Skip to content

Commit b82c452

Browse files
Feat: Implement list set dict and generator comprehensions via drain-reinject with SSA slot rewriting and append opcodes
1 parent f848298 commit b82c452

7 files changed

Lines changed: 164 additions & 89 deletions

File tree

compiler/src/modules/parser/expr.rs

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// parser/expr.rs
22

33
use super::Parser;
4-
use super::types::{OpCode, Value, MAX_EXPR_DEPTH};
4+
use super::types::{OpCode, Value, MAX_EXPR_DEPTH, Instruction};
55
use super::types::parse_string;
66
use crate::modules::lexer::{Token, TokenType};
77
use alloc::{string::ToString, vec::Vec, vec, format, string::String};
@@ -183,22 +183,31 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
183183
TokenType::Lbrace => self.brace_literal(),
184184
TokenType::Lsqb => self.list_literal(),
185185
TokenType::Lpar => {
186-
if matches!(self.peek(), Some(TokenType::Rpar)) { self.advance(); self.chunk.emit(OpCode::BuildTuple, 0); } else {
187-
self.expr();
188-
if matches!(self.peek(), Some(TokenType::For)) {
189-
self.comprehension(OpCode::GenExpr);
190-
} else if self.eat_if(TokenType::Comma) {
191-
let mut count = 1u16;
192-
while !matches!(self.peek(), Some(TokenType::Rpar) | None) {
193-
self.expr();
194-
count += 1;
195-
if !self.eat_if(TokenType::Comma) { break; }
196-
}
197-
self.eat(TokenType::Rpar);
198-
self.chunk.emit(OpCode::BuildTuple, count);
186+
if matches!(self.peek(), Some(TokenType::Rpar)) {
187+
self.advance();
188+
self.chunk.emit(OpCode::BuildTuple, 0);
199189
} else {
200-
self.eat(TokenType::Rpar);
201-
}}
190+
let elem_start = self.chunk.instructions.len();
191+
self.expr();
192+
if matches!(self.peek(), Some(TokenType::For)) {
193+
let versions_before = self.ssa_versions.clone();
194+
let elem_ins: Vec<Instruction> = self.chunk.instructions.drain(elem_start..).collect();
195+
self.chunk.emit(OpCode::BuildList, 0);
196+
self.comprehension_loop(&[elem_ins], OpCode::ListAppend, &versions_before);
197+
self.advance(); // Rpar
198+
} else if self.eat_if(TokenType::Comma) {
199+
let mut count = 1u16;
200+
while !matches!(self.peek(), Some(TokenType::Rpar) | None) {
201+
self.expr();
202+
count += 1;
203+
if !self.eat_if(TokenType::Comma) { break; }
204+
}
205+
self.eat(TokenType::Rpar);
206+
self.chunk.emit(OpCode::BuildTuple, count);
207+
} else {
208+
self.eat(TokenType::Rpar);
209+
}
210+
}
202211
}
203212
TokenType::Lambda => self.parse_lambda(),
204213
_ => {

compiler/src/modules/parser/literals.rs

Lines changed: 62 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
use super::Parser;
44
use super::types::builtin;
55

6-
use super::types::{OpCode, Value, SSAChunk};
6+
use super::types::{OpCode, Value, SSAChunk, Instruction};
77
use crate::modules::lexer::{Token, TokenType};
88
use alloc::{string::{String, ToString}, vec::Vec, format};
9+
use hashbrown::HashMap;
910

1011
impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
1112

@@ -20,19 +21,24 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
2021
self.chunk.emit(OpCode::BuildDict, 0);
2122
return;
2223
}
24+
let key_start = self.chunk.instructions.len();
2325
self.expr();
2426
match self.peek() {
2527
Some(TokenType::Colon) => {
2628
self.advance();
29+
let val_start = self.chunk.instructions.len();
2730
self.expr();
2831
if matches!(self.peek(), Some(TokenType::For)) {
29-
self.comprehension(OpCode::DictComp);
32+
let versions_before = self.ssa_versions.clone();
33+
let val_ins: Vec<Instruction> = self.chunk.instructions.drain(val_start..).collect();
34+
let key_ins: Vec<Instruction> = self.chunk.instructions.drain(key_start..).collect();
35+
self.chunk.emit(OpCode::BuildDict, 0);
36+
self.comprehension_loop(&[key_ins, val_ins], OpCode::MapAdd, &versions_before);
37+
self.advance(); // Rbrace
3038
} else {
3139
let mut pairs = 1u16;
3240
while self.eat_if(TokenType::Comma) {
33-
if matches!(self.peek(), Some(TokenType::Rbrace)) {
34-
break;
35-
}
41+
if matches!(self.peek(), Some(TokenType::Rbrace)) { break; }
3642
self.expr();
3743
self.eat(TokenType::Colon);
3844
self.expr();
@@ -43,14 +49,16 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
4349
}
4450
}
4551
Some(TokenType::For) => {
46-
self.comprehension(OpCode::SetComp);
52+
let versions_before = self.ssa_versions.clone();
53+
let elem_ins: Vec<Instruction> = self.chunk.instructions.drain(key_start..).collect();
54+
self.chunk.emit(OpCode::BuildSet, 0);
55+
self.comprehension_loop(&[elem_ins], OpCode::SetAdd, &versions_before);
56+
self.advance(); // Rbrace
4757
}
4858
_ => {
4959
let mut count = 1u16;
5060
while self.eat_if(TokenType::Comma) {
51-
if matches!(self.peek(), Some(TokenType::Rbrace)) {
52-
break;
53-
}
61+
if matches!(self.peek(), Some(TokenType::Rbrace)) { break; }
5462
self.expr();
5563
count += 1;
5664
}
@@ -71,15 +79,18 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
7179
self.chunk.emit(OpCode::BuildList, 0);
7280
return;
7381
}
82+
let elem_start = self.chunk.instructions.len();
7483
self.expr();
7584
if matches!(self.peek(), Some(TokenType::For)) {
76-
self.comprehension(OpCode::ListComp);
85+
let versions_before = self.ssa_versions.clone();
86+
let elem_ins: Vec<Instruction> = self.chunk.instructions.drain(elem_start..).collect();
87+
self.chunk.emit(OpCode::BuildList, 0);
88+
self.comprehension_loop(&[elem_ins], OpCode::ListAppend, &versions_before);
89+
self.advance(); // Rsqb
7790
} else {
7891
let mut count = 1u16;
7992
while self.eat_if(TokenType::Comma) {
80-
if matches!(self.peek(), Some(TokenType::Rsqb)) {
81-
break;
82-
}
93+
if matches!(self.peek(), Some(TokenType::Rsqb)) { break; }
8394
self.expr();
8495
count += 1;
8596
}
@@ -89,25 +100,22 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
89100
}
90101

91102
/*
92-
Comprehension Handler
93-
Generates SSA bytecode for list/set/dict/generator comprehensions.
103+
Comprehension Loop
104+
Builds for/if scaffolding and reinjects element body with SSA slot rewriting.
94105
*/
95106

96-
pub(super) fn comprehension(&mut self, op: OpCode) {
97-
let mut loop_starts = Vec::new();
98-
let mut for_iters = Vec::new();
107+
pub(super) fn comprehension_loop(&mut self, elem_bodies: &[Vec<Instruction>], append_op: OpCode, versions_before: &HashMap<String, u32>) {
108+
let mut loop_starts: Vec<u16> = Vec::new();
109+
let mut for_iters: Vec<usize> = Vec::new();
110+
let mut all_vars: Vec<String> = Vec::new();
99111

100112
while self.eat_if(TokenType::For) {
101-
let mut vars = Vec::new();
113+
let mut vars: Vec<String> = Vec::new();
102114
loop {
103115
let t = self.advance();
104116
vars.push(self.lexeme(&t).to_string());
105-
if !self.eat_if(TokenType::Comma) {
106-
break;
107-
}
108-
if matches!(self.peek(), Some(TokenType::In)) {
109-
break;
110-
}
117+
if !self.eat_if(TokenType::Comma) { break; }
118+
if matches!(self.peek(), Some(TokenType::In)) { break; }
111119
}
112120

113121
self.eat(TokenType::In);
@@ -119,42 +127,54 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
119127
let fi = self.chunk.instructions.len() - 1;
120128

121129
if vars.len() == 1 {
122-
let ver = self.increment_version(&vars[0]);
123-
let mut buf = [0u8; 128];
124-
let idx = self.chunk.push_name(Self::ssa_name(&vars[0], ver, &mut buf));
125-
self.chunk.emit(OpCode::StoreName, idx);
130+
self.store_name(vars[0].clone());
126131
} else {
127132
self.chunk.emit(OpCode::UnpackSequence, vars.len() as u16);
128133
for var in vars.iter().rev() {
129134
self.store_name(var.clone());
130135
}
131136
}
137+
for v in &vars { all_vars.push(v.clone()); }
132138

133139
while self.eat_if(TokenType::If) {
134140
self.expr_bp(1);
135-
self.chunk.emit(OpCode::JumpIfFalse, ls);
141+
self.chunk.emit(OpCode::JumpIfFalse, ls); // fail -> next iteration
136142
}
137143

138144
loop_starts.push(ls);
139145
for_iters.push(fi);
140146
}
141147

142-
let n = for_iters.len();
143-
let mut jump_positions = Vec::new();
144-
for &ls in loop_starts.iter().rev() {
145-
self.chunk.emit(OpCode::Jump, ls);
146-
jump_positions.push(self.chunk.instructions.len() - 1);
148+
// Map pre-loop slots to current versions, skipping non-existent element references.
149+
let mut var_map: HashMap<u16, u16> = HashMap::new();
150+
for var in &all_vars {
151+
let old_ver = versions_before.get(var).copied().unwrap_or(0);
152+
let new_ver = self.current_version(var);
153+
if old_ver == new_ver { continue; }
154+
let old_name = format!("{}_{}", var, old_ver);
155+
let Some(&old_slot) = self.chunk.name_index.get(old_name.as_str()) else { continue };
156+
let mut nb = [0u8; 128];
157+
let new_slot = self.chunk.push_name(Self::ssa_name(var, new_ver, &mut nb));
158+
var_map.insert(old_slot, new_slot);
147159
}
148160

149-
for i in 1..n {
150-
let target = jump_positions[n - i] as u16;
151-
self.chunk.instructions[for_iters[i]].operand = target; // back-patch inner ForIter now that all loop headers are known
161+
for body in elem_bodies {
162+
for ins in body {
163+
let operand = if matches!(ins.opcode, OpCode::LoadName | OpCode::StoreName) {
164+
var_map.get(&ins.operand).copied().unwrap_or(ins.operand)
165+
} else {
166+
ins.operand
167+
};
168+
self.chunk.instructions.push(Instruction { opcode: ins.opcode, operand });
169+
}
152170
}
171+
self.chunk.emit(append_op, 0);
153172

154-
self.patch(for_iters[0]);
155-
156-
self.advance();
157-
self.chunk.emit(op, 0);
173+
// Close loops innermost-first: Jump back to header, patch matching ForIter to the point past it.
174+
for i in (0..for_iters.len()).rev() {
175+
self.chunk.emit(OpCode::Jump, loop_starts[i]);
176+
self.patch(for_iters[i]);
177+
}
158178
}
159179

160180
/*

compiler/src/modules/parser/types.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ pub enum OpCode {
2121
GetIter, ForIter, GetItem, Mod, Pow, FloorDiv, LoadTrue, LoadFalse, LoadNone, LoadAttr, StoreAttr,
2222
BuildSlice, MakeClass, SetupExcept, PopExcept, Raise, Import, ImportFrom, BitAnd, BitOr, BitXor,
2323
BitNot, Shl, Shr, In, NotIn, Is, IsNot, UnpackSequence, BuildTuple, SetupWith, ExitWith, Yield,
24-
Del, Assert, Global, Nonlocal, UnpackArgs, ListComp, SetComp, DictComp, BuildSet, RaiseFrom,
25-
UnpackEx, LoadEllipsis, GenExpr, Await, MakeCoroutine, YieldFrom, TypeAlias, StoreItem, Dup2
24+
Del, Assert, Global, Nonlocal, UnpackArgs, ListAppend, SetAdd, MapAdd, BuildSet, RaiseFrom,
25+
UnpackEx, LoadEllipsis, Await, MakeCoroutine, YieldFrom, TypeAlias, StoreItem, Dup2
2626
}
2727

2828
/*

compiler/src/modules/vm/mod.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,40 @@ impl<'a> VM<'a> {
513513
}
514514
}
515515

516+
// Comprehension append
517+
518+
OpCode::ListAppend => {
519+
let v = self.pop()?;
520+
let acc = *self.stack.last().ok_or_else(|| VmErr::Runtime("stack underflow"))?;
521+
if !acc.is_heap() { return Err(VmErr::Runtime("list accumulator corrupted")); }
522+
match self.heap.get(acc) {
523+
HeapObj::List(rc) => rc.borrow_mut().push(v),
524+
_ => return Err(VmErr::Runtime("list accumulator corrupted")),
525+
}
526+
}
527+
OpCode::SetAdd => {
528+
let v = self.pop()?;
529+
let acc = *self.stack.last().ok_or_else(|| VmErr::Runtime("stack underflow"))?;
530+
if !acc.is_heap() { return Err(VmErr::Runtime("set accumulator corrupted")); }
531+
let already = match self.heap.get(acc) {
532+
HeapObj::Set(rc) => rc.borrow().iter().any(|&x| eq_vals_with_heap(x, v, &self.heap)),
533+
_ => return Err(VmErr::Runtime("set accumulator corrupted")),
534+
};
535+
if !already {
536+
if let HeapObj::Set(rc) = self.heap.get(acc) { rc.borrow_mut().push(v); }
537+
}
538+
}
539+
OpCode::MapAdd => {
540+
let value = self.pop()?;
541+
let key = self.pop()?;
542+
let acc = *self.stack.last().ok_or_else(|| VmErr::Runtime("stack underflow"))?;
543+
if !acc.is_heap() { return Err(VmErr::Runtime("dict accumulator corrupted")); }
544+
match self.heap.get(acc) {
545+
HeapObj::Dict(rc) => { rc.borrow_mut().insert(key, value); }
546+
_ => return Err(VmErr::Runtime("dict accumulator corrupted")),
547+
}
548+
}
549+
516550
// SSA Phi
517551

518552
OpCode::Phi => { Self::exec_phi(op, rip, &phi_map, slots, prev_slots, &chunk.phi_sources); }
@@ -656,8 +690,6 @@ impl<'a> VM<'a> {
656690
OpCode::UnpackArgs => { return Err(VmErr::Runtime("*args/**kwargs not yet supported")); }
657691
OpCode::MakeClass => { return Err(VmErr::Runtime("classes not yet supported")); }
658692
OpCode::LoadAttr | OpCode::StoreAttr => { return Err(VmErr::Runtime("attribute access not yet supported")); }
659-
OpCode::ListComp | OpCode::SetComp | OpCode::DictComp => { return Err(VmErr::Runtime("comprehensions not yet supported")); }
660-
OpCode::GenExpr => { return Err(VmErr::Runtime("generator expressions not yet supported")); }
661693
}
662694
}
663695
}

compiler/src/wasm.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
use crate::modules::{lexer::lexer, parser::Parser, vm::{VM, Limits}};
2-
31
#[cfg(target_arch = "wasm32")]
42
mod runtime {
53
use lol_alloc::LeakingPageAllocator;
6-
use super::{lexer, Parser, VM, Limits};
4+
use crate::modules::{lexer::lexer, parser::Parser, vm::{VM, Limits}};
75

86
#[global_allocator]
97
static A: LeakingPageAllocator = LeakingPageAllocator;

compiler/tests/cases/parser_cases.json

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -859,11 +859,11 @@
859859
"annotations": {}
860860
},
861861
{
862-
"src": "x = [i for i in range(3)]",
863-
"constants": ["3"],
864-
"names": ["i_0", "i_1", "x_1"],
865-
"instructions": [["LoadName",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",7], ["StoreName",1], ["Jump",4], ["ListComp",0], ["StoreName",2], ["ReturnValue",0]],
866-
"annotations": {}
862+
"src": "x = [i for i in range(3)]",
863+
"constants": ["3"],
864+
"names": ["i_0", "i_1", "x_1"],
865+
"instructions": [["BuildList",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",9], ["StoreName",1], ["LoadName",1], ["ListAppend",0], ["Jump",4], ["StoreName",2], ["ReturnValue",0]],
866+
"annotations": {}
867867
},
868868
{
869869
"src": "x = (y := 5)",
@@ -1021,11 +1021,11 @@
10211021
"annotations": {}
10221022
},
10231023
{
1024-
"src": "x = [i for i in range(3) for j in range(2)]",
1025-
"constants": ["3", "2"],
1026-
"names": ["i_0", "i_1", "j_1", "x_1"],
1027-
"instructions": [["LoadName",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",13], ["StoreName",1], ["LoadConst",1], ["CallRange",1], ["GetIter",0], ["ForIter",12], ["StoreName",2], ["Jump",9], ["Jump",4], ["ListComp",0], ["StoreName",3], ["ReturnValue",0]],
1028-
"annotations": {}
1024+
"src": "x = [i for i in range(3) for j in range(2)]",
1025+
"constants": ["3", "2"],
1026+
"names": ["i_0", "i_1", "j_1", "x_1"],
1027+
"instructions": [["BuildList",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",15], ["StoreName",1], ["LoadConst",1], ["CallRange",1], ["GetIter",0], ["ForIter",14], ["StoreName",2], ["LoadName",1], ["ListAppend",0], ["Jump",9], ["Jump",4], ["StoreName",3], ["ReturnValue",0]],
1028+
"annotations": {}
10291029
},
10301030
{
10311031
"src": "with a, b:\n pass",
@@ -1063,17 +1063,17 @@
10631063
"annotations": {}
10641064
},
10651065
{
1066-
"src": "x = [i for i in range(10) if i > 2 if i < 8]",
1067-
"constants": ["10", "2", "8"],
1068-
"names": ["i_0", "i_1", "x_1"],
1069-
"instructions": [["LoadName",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",15], ["StoreName",1], ["LoadName",1], ["LoadConst",1], ["Gt",0], ["JumpIfFalse",4], ["LoadName",1], ["LoadConst",2], ["Lt",0], ["JumpIfFalse",4], ["Jump",4], ["ListComp",0], ["StoreName",2], ["ReturnValue",0]],
1066+
"src": "x = [i for i in range(10) if i > 2 if i < 8]",
1067+
"constants": ["10", "2", "8"],
1068+
"names": ["i_0", "i_1", "x_1"],
1069+
"instructions": [["BuildList",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",17], ["StoreName",1], ["LoadName",1], ["LoadConst",1], ["Gt",0], ["JumpIfFalse",4], ["LoadName",1], ["LoadConst",2], ["Lt",0], ["JumpIfFalse",4], ["LoadName",1], ["ListAppend",0], ["Jump",4], ["StoreName",2], ["ReturnValue",0]],
10701070
"annotations": {}
10711071
},
10721072
{
1073-
"src": "x = (i for i in range(3))",
1074-
"constants": ["3"],
1075-
"names": ["i_0", "i_1", "x_1"],
1076-
"instructions": [["LoadName",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",7], ["StoreName",1], ["Jump",4], ["GenExpr",0], ["StoreName",2], ["ReturnValue",0]],
1073+
"src": "x = (i for i in range(3))",
1074+
"constants": ["3"],
1075+
"names": ["i_0", "i_1", "x_1"],
1076+
"instructions": [["BuildList",0], ["LoadConst",0], ["CallRange",1], ["GetIter",0], ["ForIter",9], ["StoreName",1], ["LoadName",1], ["ListAppend",0], ["Jump",4], ["StoreName",2], ["ReturnValue",0]],
10771077
"annotations": {}
10781078
},
10791079
{
@@ -1541,12 +1541,12 @@
15411541
"annotations": {}
15421542
},
15431543
{
1544-
"src": "x = {k: v for k, v in items}",
1545-
"constants": [],
1546-
"names": ["k_0","v_0","items_0","v_1","k_1","x_1"],
1547-
"instructions": [["LoadName",0],["LoadName",1],["LoadName",2],["GetIter",0],["ForIter",9],["UnpackSequence",2],["StoreName",3],["StoreName",4],["Jump",4],["DictComp",0],["StoreName",5],["ReturnValue",0]],
1544+
"src": "x = {k: v for k, v in items}",
1545+
"constants": [],
1546+
"names": ["k_0", "v_0", "items_0", "v_1", "k_1", "x_1"],
1547+
"instructions": [["BuildDict",0], ["LoadName",2], ["GetIter",0], ["ForIter",11], ["UnpackSequence",2], ["StoreName",3], ["StoreName",4], ["LoadName",4], ["LoadName",3], ["MapAdd",0], ["Jump",3], ["StoreName",5], ["ReturnValue",0]],
15481548
"annotations": {}
1549-
},
1549+
}
15501550
{
15511551
"src": "a[0] = 1",
15521552
"constants": ["0","1"],

0 commit comments

Comments
 (0)