Skip to content

Commit 234b93e

Browse files
Chore: Fix errors and allign docs implementation.
1 parent 67c4eb3 commit 234b93e

11 files changed

Lines changed: 597 additions & 71 deletions

File tree

compiler/src/modules/parser/literals.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,13 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
446446

447447
pub(super) fn compile_body(&mut self, params: &[String]) -> SSAChunk {
448448
let mut body = self.with_fresh_chunk(|s| {
449-
for p in params { s.ssa_versions.insert(p.clone(), 0); }
449+
for p in params {
450+
s.ssa_versions.insert(p.clone(), 0);
451+
452+
let bare = p.trim_start_matches('*');
453+
let mut buf = [0u8; 128];
454+
let _ = s.chunk.push_name(Self::ssa_name(bare, 0, &mut buf));
455+
}
450456
s.compile_block_body();
451457
});
452458
body.is_pure = !body.instructions.iter().any(|i| matches!(

compiler/src/modules/vm/builtins.rs

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -375,27 +375,6 @@ impl<'a> VM<'a> {
375375
}
376376
} else { None };
377377

378-
// Extrae el nombre de tipo de un Val: acepta HeapObj::Type Y HeapObj::NativeFn.
379-
// NativeFn es lo que realmente vive en globals["int"] porque el loop de builtin_fns
380-
// sobreescribe al loop de BUILTIN_TYPES durante VM::new().
381-
let type_name_of = |t: Val, heap: &HeapPool| -> Option<&'static str> {
382-
if !t.is_heap() { return None; }
383-
match heap.get(t) {
384-
HeapObj::Type(_) => {
385-
// Comparamos por nombre de tipo del objeto t, no del name interno.
386-
// type_name() devuelve "type" para HeapObj::Type — usamos el campo.
387-
None // manejado en check_one abajo con String comparison
388-
}
389-
HeapObj::NativeFn(id) => {
390-
let n = id.name();
391-
if matches!(n, "int"|"str"|"float"|"bool"|"list"|"tuple"|"dict"|"set") {
392-
Some(n)
393-
} else { None }
394-
}
395-
_ => None,
396-
}
397-
};
398-
399378
let check_one = |t: Val, heap: &HeapPool| -> Result<bool, VmErr> {
400379
if !t.is_heap() {
401380
return Err(VmErr::Type("isinstance() arg 2 must be a type or tuple of types"));

compiler/tests/cases/vm.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,5 +480,9 @@
480480
{"src": "print(hasattr('hello', 'xyz'))", "output": ["False"], "result": "None"},
481481
{"src": "print(hasattr([1,2], 'append'))", "output": ["True"], "result": "None"},
482482
{"src": "m = getattr('hi', 'upper')\nprint(m())", "output": ["HI"], "result": "None"},
483-
{"src": "print(getattr('hi', 'xyz', 'default'))", "output": ["default"], "result": "None"}
483+
{"src": "print(getattr('hi', 'xyz', 'default'))", "output": ["default"], "result": "None"},
484+
{"src": "double = lambda n: n * 2\nsquare = lambda n: n * n\ndef compose(*fns):\n def piped(x):\n for f in fns:\n x = f(x)\n return x\n return piped\npipeline = compose(double, square)\nprint([pipeline(x) for x in [1, 2, 3]])", "output": ["[4, 16, 36]"], "result": "None"},
485+
{"src": "def outer(*args):\n def inner():\n return args\n return inner\nprint(outer(1, 2, 3)())", "output": ["[1, 2, 3]"], "result": "None"},
486+
{"src": "def make(x, y):\n def get_y():\n return y\n return get_y\nprint(make(99, 'hi')())", "output": ["hi"], "result": "None"},
487+
{"src": "def compose(f, g):\n return lambda x: f(g(x))\ninc = lambda n: n + 1\ndbl = lambda n: n * 2\nprint(compose(inc, dbl)(5))", "output": ["11"], "result": "None"}
484488
]

demo/packages.json

Lines changed: 0 additions & 26 deletions
This file was deleted.

demo/static/a-letter.svg

Lines changed: 0 additions & 4 deletions
This file was deleted.

demo/static/binary.svg

Lines changed: 0 additions & 7 deletions
This file was deleted.

demo/static/dices.svg

Lines changed: 0 additions & 8 deletions
This file was deleted.

demo/static/sigma.svg

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
---
2+
title: "Design"
3+
description: "Compiler architecture, dispatch model, and runtime layout."
4+
---
5+
6+
## Overview
7+
8+
Edge Python is a compact bytecode compiler and stack VM for a functional subset of Python 3.13. The release build is approximately 130 KB on `wasm32-unknown-unknown` with `panic=abort` and `opt-level=z`. The codebase is organized as a hand-written lexer, a single-pass Pratt parser that emits SSA-versioned bytecode directly, a peephole optimizer for constant folding, and a token-threaded interpreter with two layers of adaptive specialization on top.
9+
10+
There is no AST and no IR: bytecode is the only intermediate representation between source and execution.
11+
12+
## Pipeline
13+
14+
```text
15+
source bytes
16+
17+
18+
┌──────────┐
19+
│ Lexer │ LUT-driven scan, offsets into source, soft-keyword resolution
20+
└──────────┘
21+
│ (start, end, kind) tokens
22+
23+
┌──────────┐
24+
│ Parser │ Pratt precedence, SSA versioning, Phi at joins
25+
└──────────┘
26+
│ SSAChunk { instructions, constants, names, functions, classes }
27+
28+
┌──────────┐
29+
│Optimizer │ Constant folding, dead-code compaction, jump remap
30+
└──────────┘
31+
│ same SSAChunk, smaller
32+
33+
┌──────────┐
34+
│ VM │ Token-threaded dispatch, IC, template memoization, mark-sweep GC
35+
└──────────┘
36+
37+
38+
output buffer
39+
```
40+
41+
## Concepts
42+
43+
- **Offset-based tokens**: Tokens carry `(start, end, kind)` indices into the source buffer. No string copies during lexing; identifier and string content is sliced lazily by the parser.
44+
- **Single-pass SSA codegen**: Variables are versioned per assignment (`x``x_1`, `x_2`). Control-flow joins emit explicit `Phi` opcodes resolved at runtime.
45+
- **Token-threaded dispatch**: The instruction stream is `Vec<Instruction>` where each `Instruction` is `(opcode: OpCode, operand: u16)`. The hot loop is a flat `match` on the opcode variant. Rust lowers it to a jump table; this is *token threading*, not direct threading (computed-goto is not available in safe Rust).
46+
- **Per-instruction inline caching**: Each binary op records the type tags of its operands. After 4 stable hits the IC stores a typed `FastOp` (e.g. `AddInt`, `LtFloat`) used as a speculative fast path with a type-guard deopt.
47+
- **Template memoization**: Pure user functions cache results keyed by their argument tuple. After 2 hits the cached value short-circuits execution. Functions are statically classified as pure/impure during emission, and the runtime tightens the classification by observing `StoreItem`, `StoreAttr`, `Raise`, etc.
48+
- **NaN-boxed values**: `Val` is a 64-bit union encoding ints, floats, bools, None, and 28-bit heap indices in a single word.
49+
- **Mark-and-sweep GC**: Triggered when the heap crosses an adaptive threshold. Roots include the stack, globals, iterator frames, the current slot window, and saved live-slot snapshots.
50+
51+
## Bytecode shape
52+
53+
Each `Instruction` is 4 bytes: a 1-byte `OpCode` discriminant (with `#[repr(u8)]` planned), a 2-byte operand, and 1 byte of padding. Opcodes fall into 17 categories — load, store, arith, bitwise, compare, logic, identity, control flow, iter, build, container, comprehension, function, ssa (Phi), yield, side effects, and unsupported (raises at runtime).
54+
55+
```text
56+
OpCode::LoadConst operand = constant index
57+
OpCode::LoadName operand = name slot
58+
OpCode::StoreName operand = name slot
59+
OpCode::Add / Sub operand = 0 (IC slot derived from ip)
60+
OpCode::Call operand = (kw << 8) | pos
61+
OpCode::Phi operand = target slot, sources in chunk.phi_sources
62+
OpCode::ForIter operand = jump target on iterator exhaustion
63+
```
64+
65+
## Dispatch shape
66+
67+
The hot loop reads `cache.fused_ref()[ip]` — a snapshot of the instruction stream where adjacent `LoadAttr + Call` pairs have been fused into the `CallMethod + CallMethodArgs` superinstruction. This fusion is performed once per chunk, cached, and reused across calls.
68+
69+
For arithmetic and comparison opcodes, the loop first checks `cache.get_fast(ip)`. If a `FastOp` is present, the speculative path runs inline and pops two operands without a function call. On a type-guard miss the cache is invalidated and execution falls back to the generic handler. The IC is per-instruction, so monomorphic call sites stabilize independently.
70+
71+
## Memory model
72+
73+
`Val` is 64 bits NaN-boxed:
74+
75+
| Tag | Pattern | Notes |
76+
|-----------|-----------------------------------------|------------------------------|
77+
| Float | any non-canonical IEEE-754 | Quiet NaN remapped |
78+
| Int | `QNAN | SIGN | i48` | ±2⁴⁷ inline; BigInt above |
79+
| None | `QNAN | 1` | |
80+
| True | `QNAN | 2` | |
81+
| False | `QNAN | 3` | |
82+
| Heap | `QNAN | 4 | (i28 << 4)` | 28-bit index into `HeapPool` |
83+
84+
The heap is an arena of `Option<HeapObj>` slots with a free list. Strings of 64 bytes or fewer are interned in a side hash. Integers above 2⁴⁷ are promoted to `BigInt`, a base-2³² little-endian limb array with Knuth Algorithm D for division. The garbage collector is a single-color mark-and-sweep that runs when `live > gc_threshold` or `alloc_count > max(live/4, 4096)`.
85+
86+
## What the compiler intentionally does *not* do
87+
88+
- No SSA-wide constant propagation through `LoadName`. The load is preserved because removing it pessimizes the IC, super-op, and template paths.
89+
- No CSE, GVN, LICM, inlining, or closed-form loop folding.
90+
- No dead-store elimination beyond what falls out of constant folding.
91+
- No IR — there is exactly one representation between source and dispatch.
92+
- No JIT. Edge Python stays single-tier and pure Rust. Method JITs need per-architecture stencils; trace JITs duplicate the execution model and complicate the GC contract.
93+
- No object model. `class` parses but `MakeClass` raises at runtime — the language is functional.
94+
- No module system. `import` and `from ... import` parse but raise at runtime.
95+
96+
## Architecture
97+
98+
```text
99+
src/
100+
├── lib.rs
101+
├── main.rs
102+
└── modules/
103+
├── fstr.rs format helpers without core::fmt
104+
├── fx.rs FxHashMap / FxHashSet (no_std hasher)
105+
├── lexer/
106+
│ ├── mod.rs public Token / TokenType, lexer entry
107+
│ ├── scan.rs byte-level scanner state machine
108+
│ └── tables.rs BYTE_CLASS, SINGLE_TOK, keyword LUT
109+
├── parser/
110+
│ ├── mod.rs Parser struct, SSA join logic, error recovery
111+
│ ├── expr.rs Pratt precedence climbing, postfix tails
112+
│ ├── stmt.rs statement dispatch, name_stmt with augmented assign
113+
│ ├── control.rs if / for / while / try / with / match / import
114+
│ ├── literals.rs list / dict / set / fstring / call / params
115+
│ └── types.rs OpCode, SSAChunk, Diagnostic, Value
116+
└── vm/
117+
├── mod.rs VM struct, exec loop, dispatch, GC roots
118+
├── cache.rs OpcodeCache (IC), Templates (memoization), method fusion
119+
├── optimizer.rs constant folding pass + jump remap
120+
├── ops.rs binop kernels, equality, truthiness, type tag
121+
├── types.rs Val, HeapObj, BigInt, DictMap, VmErr, Limits
122+
├── builtins.rs built-in function bodies (print, len, abs, ...)
123+
└── handlers/
124+
├── arith.rs Add, Sub, Mul, Div, Mod, Pow, FloorDiv, Minus, BitOps, Compare, Logic
125+
├── data.rs Store, Build, Container, Comprehension, Yield, Side
126+
├── function.rs Call, MakeFunction, exec_call, dispatch_native
127+
└── methods.rs string / list / dict method tables, dispatch_method
128+
```
129+
130+
## Capabilities
131+
132+
| Types | Control flow | Built-ins | Lexical |
133+
|--------|------------------|-------------------|-----------------|
134+
| int | if / elif / else | I/O | indentation |
135+
| float | for / while | type conversion | f-string |
136+
| str | match / case | introspection | walrus operator |
137+
| bool | functions | iteration | comments |
138+
| list | lambdas | aggregation | docstrings |
139+
| dict | generators | math | underscore |
140+
| tuple | comprehensions | sequence ops | complex numbers |
141+
| set | try / except | logical reduction | escape sequences|
142+
| range | with | number formatting | - |
143+
| None | async / await¹ | - | - |
144+
| BigInt | yield / yield from | - | - |
145+
146+
¹ async syntax parses and emits `MakeCoroutine` for compatibility, but there is no event loop — coroutines run synchronously.
147+
148+
## References
149+
150+
1. Aho, Sethi & Ullman. *Compilers: Principles, Techniques and Tools* (1986). LUT-based lexer.
151+
2. Pratt. *Top Down Operator Precedence* (POPL 1973).
152+
3. Cytron et al. *Efficiently Computing Static Single Assignment Form* (TOPLAS 1991).
153+
4. Gudeman. *Representing Type Information in Dynamically Typed Languages* (1993). NaN-boxing.
154+
5. Deutsch & Schiffman. *Efficient Implementation of the Smalltalk-80 System* (POPL 1984). Inline caching.
155+
6. Ertl & Gregg. *The Structure and Performance of Efficient Interpreters* (JILP 2003). Threaded dispatch.
156+
7. Casey et al. *Towards Superinstructions for Java Interpreters* (SCOPES 2003). LoadAttr+Call fusion.
157+
8. Michie. *Memo Functions and Machine Learning* (Nature 1968). Pure-function memoization.
158+
9. McCarthy. *Recursive Functions of Symbolic Expressions* (CACM 1960). Mark-sweep GC.
159+
10. Knuth. *The Art of Computer Programming, Vol. 2* (1981). Algorithm D for BigInt division.
160+
11. Backus. *Can Programming Be Liberated from the von Neumann Style?* (CACM 1978). Function-level paradigm.

0 commit comments

Comments
 (0)