Skip to content

Commit 22e2959

Browse files
feat(lang): collection unpacking, sequence ordering, raise TypeError.
2 parents b42f901 + 67f3229 commit 22e2959

12 files changed

Lines changed: 215 additions & 34 deletions

File tree

compiler/src/modules/parser/expr.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,10 +294,10 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
294294
}
295295
}
296296

297-
/* Postfix trailers: `.attr`, [i], [s:e], (args), chained. */
297+
/* Postfix trailers: `.attr`, [i], [s:e], (args), chained. A trailer must start on the same line, so a statement boundary ends the chain (else `x = []` ⏎ `[i]` parses as `[][i]`). */
298298
pub(super) fn postfix_tail(&mut self) {
299299
loop {
300-
match self.peek() {
300+
match self.peek_same_line() {
301301
Some(TokenType::Lsqb) => {
302302
self.advance();
303303
let is_slice = matches!(self.peek(), Some(TokenType::Colon));

compiler/src/modules/parser/literals.rs

Lines changed: 81 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
1818
self.chunk.emit(OpCode::BuildDict, 0);
1919
return;
2020
}
21+
// `{**m, ...}`: leading mapping-unpack => dict built incrementally.
22+
if self.eat_if(TokenType::DoubleStar) {
23+
self.chunk.emit(OpCode::BuildDict, 0);
24+
self.expr();
25+
self.chunk.emit(OpCode::DictUpdate, 0);
26+
self.dict_tail(0, true);
27+
return;
28+
}
29+
// `{*s, ...}`: leading iterable-unpack => set built incrementally.
30+
if self.eat_if(TokenType::Star) {
31+
self.chunk.emit(OpCode::BuildSet, 0);
32+
self.expr();
33+
self.chunk.emit(OpCode::SetUpdate, 0);
34+
self.set_tail(0, true);
35+
return;
36+
}
2137
let key_start = self.chunk.instructions.len();
2238
self.expr();
2339
match self.peek() {
@@ -33,16 +49,8 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
3349
self.comprehension_loop(&[key_ins, val_ins], OpCode::MapAdd, &versions_before);
3450
self.eat(TokenType::Rbrace);
3551
} else {
36-
let mut pairs = 1u16;
37-
while self.eat_if(TokenType::Comma) {
38-
if matches!(self.peek(), Some(TokenType::Rbrace)) { break; }
39-
self.expr();
40-
self.eat(TokenType::Colon);
41-
self.expr();
42-
pairs += 1;
43-
}
44-
self.eat(TokenType::Rbrace);
45-
self.chunk.emit(OpCode::BuildDict, pairs);
52+
// First pair already emitted; dict_tail consolidates if a later `**` appears.
53+
self.dict_tail(1, false);
4654
}
4755
}
4856
Some(TokenType::For) => {
@@ -53,14 +61,8 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
5361
self.eat(TokenType::Rbrace);
5462
}
5563
_ => {
56-
let mut count = 1u16;
57-
while self.eat_if(TokenType::Comma) {
58-
if matches!(self.peek(), Some(TokenType::Rbrace)) { break; }
59-
self.expr();
60-
count += 1;
61-
}
62-
self.eat(TokenType::Rbrace);
63-
self.chunk.emit(OpCode::BuildSet, count);
64+
// First element already emitted; set_tail consolidates if a later `*` appears.
65+
self.set_tail(1, false);
6466
}
6567
}
6668
}
@@ -72,6 +74,14 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
7274
self.chunk.emit(OpCode::BuildList, 0);
7375
return;
7476
}
77+
// `[*it, ...]`: leading iterable-unpack => list built incrementally.
78+
if self.eat_if(TokenType::Star) {
79+
self.chunk.emit(OpCode::BuildList, 0);
80+
self.expr();
81+
self.chunk.emit(OpCode::ListExtend, 0);
82+
self.list_tail(0, true);
83+
return;
84+
}
7585
let elem_start = self.chunk.instructions.len();
7686
self.expr();
7787
if matches!(self.peek(), Some(TokenType::For)) {
@@ -81,15 +91,62 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
8191
self.comprehension_loop(&[elem_ins], OpCode::ListAppend, &versions_before);
8292
self.eat(TokenType::Rsqb);
8393
} else {
84-
let mut count = 1u16;
85-
while self.eat_if(TokenType::Comma) {
86-
if matches!(self.peek(), Some(TokenType::Rsqb)) { break; }
94+
// First element already emitted; list_tail consolidates if a later `*` appears.
95+
self.list_tail(1, false);
96+
}
97+
}
98+
99+
/* Finishes a `{}` dict after the first pair. `pairs` = loose key/val pairs on the stack; `incremental` = a Dict object is already on the stack. On the first `**` the loose pairs are consolidated with `BuildDict pairs`, then merges use `DictUpdate`/`MapAdd`. */
100+
fn dict_tail(&mut self, mut pairs: u16, mut incremental: bool) {
101+
while self.eat_if(TokenType::Comma) {
102+
if matches!(self.peek(), Some(TokenType::Rbrace)) { break; }
103+
if self.eat_if(TokenType::DoubleStar) {
104+
if !incremental { self.chunk.emit(OpCode::BuildDict, pairs); incremental = true; }
87105
self.expr();
88-
count += 1;
106+
self.chunk.emit(OpCode::DictUpdate, 0);
107+
} else {
108+
self.expr();
109+
self.eat(TokenType::Colon);
110+
self.expr();
111+
if incremental { self.chunk.emit(OpCode::MapAdd, 0); } else { pairs += 1; }
112+
}
113+
}
114+
self.eat(TokenType::Rbrace);
115+
if !incremental { self.chunk.emit(OpCode::BuildDict, pairs); }
116+
}
117+
118+
/* Finishes a `{}` set after the first element; mirrors `dict_tail` with `SetUpdate`/`SetAdd` and `BuildSet`. */
119+
fn set_tail(&mut self, mut count: u16, mut incremental: bool) {
120+
while self.eat_if(TokenType::Comma) {
121+
if matches!(self.peek(), Some(TokenType::Rbrace)) { break; }
122+
if self.eat_if(TokenType::Star) {
123+
if !incremental { self.chunk.emit(OpCode::BuildSet, count); incremental = true; }
124+
self.expr();
125+
self.chunk.emit(OpCode::SetUpdate, 0);
126+
} else {
127+
self.expr();
128+
if incremental { self.chunk.emit(OpCode::SetAdd, 0); } else { count += 1; }
129+
}
130+
}
131+
self.eat(TokenType::Rbrace);
132+
if !incremental { self.chunk.emit(OpCode::BuildSet, count); }
133+
}
134+
135+
/* Finishes a `[]` list after the first element; mirrors `dict_tail` with `ListExtend`/`ListAppend` and `BuildList`. */
136+
fn list_tail(&mut self, mut count: u16, mut incremental: bool) {
137+
while self.eat_if(TokenType::Comma) {
138+
if matches!(self.peek(), Some(TokenType::Rsqb)) { break; }
139+
if self.eat_if(TokenType::Star) {
140+
if !incremental { self.chunk.emit(OpCode::BuildList, count); incremental = true; }
141+
self.expr();
142+
self.chunk.emit(OpCode::ListExtend, 0);
143+
} else {
144+
self.expr();
145+
if incremental { self.chunk.emit(OpCode::ListAppend, 0); } else { count += 1; }
89146
}
90-
self.eat(TokenType::Rsqb);
91-
self.chunk.emit(OpCode::BuildList, count);
92147
}
148+
self.eat(TokenType::Rsqb);
149+
if !incremental { self.chunk.emit(OpCode::BuildList, count); }
93150
}
94151

95152
/* Emits for/if comprehension scaffolding; reinjcts body with loop-bound SSA slots. */

compiler/src/modules/parser/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,17 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
339339
}
340340
}
341341

342+
/* Like `peek` but stops at the logical-line boundary: a `Newline` yields `None` (unconsumed) so postfix trailers don't bind across statements. `Nl`/`Comment` (bracket-internal) still skip, so multiline `()`/`[]`/`{}` are unaffected. */
343+
pub(super) fn peek_same_line(&mut self) -> Option<TokenType> {
344+
loop {
345+
match self.tokens.peek().map(|t| t.kind) {
346+
Some(TokenType::Nl | TokenType::Comment) => { self.tokens.next(); }
347+
Some(TokenType::Newline) | Some(TokenType::Endmarker) | None => return None,
348+
Some(k) => return Some(k),
349+
}
350+
}
351+
}
352+
342353
pub(super) fn patch(&mut self, pos: usize) {
343354
self.chunk.instructions[pos].operand = self.chunk.instructions.len() as u16;
344355
}

compiler/src/modules/parser/types.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub enum OpCode {
3030
LoadModule,
3131
/* Read/write a `global`-declared name from/to `self.globals`; operand indexes the bare name in `chunk.names`. */
3232
LoadGlobal, StoreGlobal,
33+
/* Literal unpacking: pop a source value and merge it into the container left below it on the stack. `{**m}` / `{*s}` / `[*it]`. */
34+
DictUpdate, SetUpdate, ListExtend,
3335
}
3436

3537
// Python builtin name -> (specialised OpCode, `leaves_value_on_stack`).

compiler/src/modules/vm/dispatch.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ impl<'a> VM<'a> {
514514
OpCode::UnpackSequence | OpCode::UnpackEx | OpCode::FormatValue => self.handle_container(opcode, operand, chunk, slots)?,
515515

516516
OpCode::ListAppend | OpCode::SetAdd | OpCode::MapAdd => self.handle_comprehension(opcode)?,
517+
OpCode::DictUpdate | OpCode::SetUpdate | OpCode::ListExtend => self.handle_spread_merge(opcode)?,
517518

518519
OpCode::Yield => self.handle_yield()?,
519520
OpCode::LoadEllipsis => {

compiler/src/modules/vm/handlers/data.rs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,46 @@ impl<'a> VM<'a> {
124124
Ok(())
125125
}
126126

127+
/* Merge the source on top of the stack into the container below it: `{**m}`, `{*s}`, `[*it]`. */
128+
pub(crate) fn handle_spread_merge(&mut self, op: OpCode) -> Result<(), VmErr> {
129+
let src = self.pop()?;
130+
let acc = *self.stack.last().ok_or(VmErr::Runtime("stack underflow"))?;
131+
if !acc.is_heap() { return Err(cold_runtime("spread accumulator corrupted")); }
132+
match op {
133+
OpCode::DictUpdate => {
134+
// `**` requires a mapping; later keys overwrite earlier ones.
135+
let pairs: Vec<(Val, Val)> = match self.heap.get(src) {
136+
HeapObj::Dict(rc) => rc.borrow().iter().collect(),
137+
_ => return Err(cold_type("argument after ** must be a mapping")),
138+
};
139+
if let HeapObj::Dict(rc) = self.heap.get(acc) {
140+
let mut m = rc.borrow_mut();
141+
for (k, v) in pairs { m.insert(k, v); }
142+
}
143+
}
144+
OpCode::SetUpdate => {
145+
let items = self.iter_to_vec_for_spread(src)?;
146+
for it in items {
147+
self.require_hashable(it)?;
148+
let dup = match self.heap.get(acc) {
149+
HeapObj::Set(rc) => rc.borrow().iter().any(|&x| eq_vals_with_heap(x, it, &self.heap)),
150+
_ => return Err(cold_runtime("spread accumulator corrupted")),
151+
};
152+
if !dup && let HeapObj::Set(rc) = self.heap.get(acc) { rc.borrow_mut().insert(it); }
153+
}
154+
}
155+
OpCode::ListExtend => {
156+
let items = self.iter_to_vec_for_spread(src)?;
157+
match self.heap.get(acc) {
158+
HeapObj::List(rc) => rc.borrow_mut().extend(items),
159+
_ => return Err(cold_runtime("spread accumulator corrupted")),
160+
}
161+
}
162+
_ => return Err(cold_runtime("non-spread opcode in handle_spread_merge")),
163+
}
164+
Ok(())
165+
}
166+
127167
/* Yield: keep the value on the stack and flag the executor to suspend. */
128168
pub(crate) fn handle_yield(&mut self) -> Result<(), VmErr> {
129169
let v = self.pop()?;
@@ -180,7 +220,8 @@ impl<'a> VM<'a> {
180220
let msg = match info {
181221
Some((n, Some(arg))) => { let detail = self.display(arg); crate::s!(str &n, ": ", str &detail) }
182222
Some((n, None)) => n,
183-
None => self.display(exc),
223+
// Non-exception value (str, int, ...): raises TypeError, catchable by `except Exception`.
224+
None => crate::s!("TypeError: exceptions must derive from BaseException"),
184225
};
185226
return Err(VmErr::Raised(msg));
186227
}

compiler/src/modules/vm/ops.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,13 +262,34 @@ impl<'a> VM<'a> {
262262
if let Some((af, bf)) = coerce_floats(a, b) { return Ok(af < bf); }
263263
// Wide-int compare in i128; falls through when either side isn't int-like.
264264
if let (Some(ai), Some(bi)) = (as_i128(a, &self.heap), as_i128(b, &self.heap)) { return Ok(ai < bi); }
265-
if a.is_heap() && b.is_heap()
266-
&& let (HeapObj::Str(x), HeapObj::Str(y)) = (self.heap.get(a), self.heap.get(b)) {
267-
return Ok(x < y);
265+
if a.is_heap() && b.is_heap() {
266+
match (self.heap.get(a), self.heap.get(b)) {
267+
(HeapObj::Str(x), HeapObj::Str(y)) => return Ok(x < y),
268+
(HeapObj::Bytes(x), HeapObj::Bytes(y)) => return Ok(x < y),
269+
// Sequences compare lexicographically; clone to drop the heap borrow before recursing.
270+
(HeapObj::List(x), HeapObj::List(y)) => {
271+
let (x, y) = (x.borrow().clone(), y.borrow().clone());
272+
return self.seq_lt(&x, &y);
273+
}
274+
(HeapObj::Tuple(x), HeapObj::Tuple(y)) => {
275+
let (x, y) = (x.clone(), y.clone());
276+
return self.seq_lt(&x, &y);
277+
}
278+
_ => {}
279+
}
268280
}
269281
Err(VmErr::TypeMsg(s!("'<' not supported between instances of '", str self.type_name(a), "' and '", str self.type_name(b), "'")))
270282
}
271283

284+
/* Lexicographic `<` for sequences: first differing element decides; otherwise the shorter is less. Recurses through `lt_vals`, so nested sequences and mixed element types are handled (and rejected) consistently. */
285+
pub fn seq_lt(&self, xs: &[Val], ys: &[Val]) -> Result<bool, VmErr> {
286+
for (&x, &y) in xs.iter().zip(ys.iter()) {
287+
if eq_vals_with_heap(x, y, &self.heap) { continue; }
288+
return self.lt_vals(x, y);
289+
}
290+
Ok(xs.len() < ys.len())
291+
}
292+
272293
/* Item presence in list/tuple/dict/set, or substring in string. */
273294
pub fn contains(&self, container: Val, item: Val) -> bool {
274295
if !container.is_heap() { return false; }

compiler/tests/cases/vm.json

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1750,5 +1750,30 @@
17501750
{"src": "async def child():\n receive()\n return receive()\ntry:\n r = run(child())\n print(f'ok:{r}')\nexcept Exception as e:\n print(f'err:{e}')", "interactive_events": ["a", "b"], "output": ["ok:b"]},
17511751
{"src": "def f():\n return 'x'\nprint(f'val:{f()}')", "output": ["val:x"]},
17521752
{"src": "print(f'got:{receive()}')", "events": ["hi"], "output": ["got:hi"]},
1753-
{"src": "async def child():\n return 'x'\nprint(f'got:{run(child())}')", "output": ["got:x"]}
1753+
{"src": "async def child():\n return 'x'\nprint(f'got:{run(child())}')", "output": ["got:x"]},
1754+
{"src": "print(len({**{1:1}, **{2:2}}))", "output": ["2"]},
1755+
{"src": "print({**{'a':1}, **{'a':9}})", "output": ["{'a': 9}"]},
1756+
{"src": "print({'x':1, **{'y':2}})", "output": ["{'x': 1, 'y': 2}"]},
1757+
{"src": "print({**{'a':1}, 'b':2})", "output": ["{'a': 1, 'b': 2}"]},
1758+
{"src": "d = {'a': 1}\nprint({**d, 'b': 2, **{'c': 3}})", "output": ["{'a': 1, 'b': 2, 'c': 3}"]},
1759+
{"src": "print([*[1, 2], 3, *[4, 5]])", "output": ["[1, 2, 3, 4, 5]"]},
1760+
{"src": "print([*[1, 2], 3])", "output": ["[1, 2, 3]"]},
1761+
{"src": "print([0, *range(3)])", "output": ["[0, 0, 1, 2]"]},
1762+
{"src": "print(len({*[1, 2], *[2, 3]}))", "output": ["3"]},
1763+
{"src": "print(sorted({*[3, 1], *[1, 2]}))", "output": ["[1, 2, 3]"]},
1764+
{"src": "try:\n raise 'soy un string'\nexcept Exception as e:\n print(type(e).__name__)", "output": ["TypeError"]},
1765+
{"src": "try:\n raise 42\nexcept Exception as e:\n print('caught', type(e).__name__)", "output": ["caught TypeError"]},
1766+
{"src": "try:\n raise 'x'\nexcept TypeError:\n print('ok')", "output": ["ok"]},
1767+
{"src": "x = [1, 2, 3]\n[print(i) for i in range(2)]", "output": ["0", "1"]},
1768+
{"src": "xs = []\n[xs.append(i) for i in range(3)]\nprint(xs)", "output": ["[0, 1, 2]"]},
1769+
{"src": "d = {'a': 1}\n[print(d[k]) for k in d]", "output": ["1"]},
1770+
{"src": "n = 5\n[0, 1, 2]\nprint(n)", "output": ["5"]},
1771+
{"src": "x = (\n 1 + 2 +\n 3\n)\nprint(x)", "output": ["6"]},
1772+
{"src": "x = [\n 1,\n 2,\n]\nprint(x)", "output": ["[1, 2]"]},
1773+
{"src": "[print(x) for x in range(2)]\nd = {}\n[d.update({x: x}) for x in range(2)]\nprint(d[0], d[1])\nxs = []\n[xs.append(x) for x in range(2)]\nprint(xs)", "output": ["0", "1", "0 1", "[0, 1]"]},
1774+
{"src": "print(sorted([(1, 2), (1, 1), (0, 9)]))", "output": ["[(0, 9), (1, 1), (1, 2)]"]},
1775+
{"src": "print([3, 1] < [3, 2])", "output": ["True"]},
1776+
{"src": "print((1, 2, 3) < (1, 2))", "output": ["False"]},
1777+
{"src": "print(b'abc' < b'abd')", "output": ["True"]},
1778+
{"src": "print(sorted([[2], [1], [1, 0]]))", "output": ["[[1], [1, 0], [2]]"]}
17541779
]

docs/pages/language/data-types.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,25 @@ set()
317317
True
318318
```
319319

320+
## Unpacking in literals
321+
322+
`*` spreads an iterable into a list/set literal; `**` spreads a mapping into a dict literal. Mix freely with regular elements; for dicts, later keys win.
323+
324+
```python
325+
xs = [1, 2]
326+
print([*xs, 3, *xs]) # list spread
327+
print({*xs, 2, 3}) # set spread (deduped)
328+
329+
a = {"x": 1}
330+
print({**a, "y": 2, **{"x": 9}}) # dict spread, later key wins
331+
```
332+
333+
```text Output
334+
[1, 2, 3, 1, 2]
335+
{1, 2, 3}
336+
{'x': 9, 'y': 2}
337+
```
338+
320339
## Range
321340

322341
Lazy integer sequence. `range(stop)`, `range(start, stop)`, `range(start, stop, step)`.

docs/pages/language/syntax.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,16 +203,20 @@ print(-5, +5)
203203

204204
### Comparison and chaining
205205

206+
Ordering comparisons (`<`, `>`, `<=`, `>=`) work on numbers, strings, bytes, and tuples/lists (compared lexicographically); mixing un-orderable types raises `TypeError`.
207+
206208
```python
207209
print(1 < 2 < 3) # chained
208210
print(0 < 5 < 10)
209211
print(1 == 1 == 1)
212+
print([1, 2] < [1, 3]) # lexicographic
210213
```
211214

212215
```text Output
213216
True
214217
True
215218
True
219+
True
216220
```
217221

218222
### Logical

0 commit comments

Comments
 (0)