diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..451c437 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[env] +LLVM_SYS_181_PREFIX = "/usr/lib/llvm18" diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b1ba031 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,28 @@ +* text=auto + +*.rs text eol=lf +*.toml text eol=lf +*.md text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.json text eol=lf +*.aelys text eol=lf +*.aasm text eol=lf +*.sh text eol=lf + +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.pdf binary +*.dll binary +*.lib binary +*.a binary +*.so binary +*.dylib binary +*.exe binary + +# exclude from github language detection +*.inc linguist-vendored +*.cpp linguist-vendored diff --git a/.gitignore b/.gitignore index fff8465..0d1f131 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,17 @@ Cargo.lock /aelys/target/ \target .idea/ -AELYS_TECHNICAL_REFERENCE.md \ No newline at end of file +architecture.md +backend_status.md +aelys/tests/exploration/* +*.ps1 +**/*.obj +**/*.exe +main.aelys +main.ll +torture/ +audit/** +audit_tmp/** +.vs/** +.idea/** +tests_e2e/** \ No newline at end of file diff --git a/ACKNOWLEDGEMENTS.md b/ACKNOWLEDGEMENTS.md index 9e2255d..84c3523 100644 --- a/ACKNOWLEDGEMENTS.md +++ b/ACKNOWLEDGEMENTS.md @@ -1,35 +1,17 @@ # Acknowledgements -## Helps and Contributions +## Helps and Indirect Contributions -**[Keggek](https://codeberg.org/gek)** - For the discussions about the project and language design <3 -**[Lekebabiste](https://github.com/Lekebabiste)** - For helping with the UDP implementation <3 -**[SpaceGame](https://github.com/SpaceGame-wq)** - For making an Aelys syntax highlighting [VSCode extension](https://marketplace.visualstudio.com/items?itemName=SpaceGame.aelys-lang) <3 - -## Inspirations - -**Rust** - The syntax style, `let mut` for mutability, range expressions (`..` and `..=`). -Also happens to be what Aelys is written in lol - -**Go** - Automatic semicolon insertion, the philosophy of simplicity, fast compilation. - -**Lua** - Lightweight VM design, embeddability goals. The original inspiration for trying to build something small and fast. - -**Python** - Readability focus. The `and`/`or`/`not` keywords. - -Honestly I really want do make a language that feels like a blend of all these things, taking the best ideas from each. -Some sort of « python but that treats you as an adult » +**[Keggek](https://codeberg.org/gek)** - For the discussions about the project and language design. You are more than a mentor, thank you. --- -## Usage of AI - -Debugging sessions, architectural discussions, and keeping me sane when the VM decides to just.. not work. +**[SpaceGame](https://github.com/SpaceGame-wq)** - For making an Aelys syntax highlighting [VSCode extension](https://marketplace.visualstudio.com/items?itemName=SpaceGame.aelys-lang) -AI also wrote most of the tests for Aelys, some stuff in the examples/ folder (notably the benchmarks), and also some parts of the stdlib, which saved a lot of time. -I prefer not to focus too much on that and instead work on the VM rather than anything else +## Inspirations +TODO: -Without AI assistance this would've taken 10x longer, maybe more. +## Usage of AI -This whole thing started as a way to actually understand how compilers work beyond just reading about them, and turns out building one is the best way to learn +TODO: \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f111580..abe3ce0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,43 @@ All notable changes to Aelys, roughly grouped by version. I don't always tag releases perfectly, so this is reconstructed from git history +## 0.21.x - LLVM backend implementation + +**0.21.5-a** +- LLVM codegen: Windows x64 MSVC sret ABI fix for struct returns (fixes bootstrap println segfault) +- string comparison (==, !=), logical not (!), println` accepts i64/f64/bool (bootstrap to_string) +- Delete the legacy VM backend + +#### Note about that : + +The previous VM served its purpose for prototyping, but Aelys is a systems programming language, not a scripting engine. + +A compact alternative VM will return later once LLVM is mature enough (meta-programming?) + +**0.21.4-a** +- LLVM codegen: SSA-aware local lowering, no more round-tripping with alloca/store/load +- LLVM backend hardening, no panic/todo path now, unsupported AIR node will give a compile time error +- Stable native entrypoint for LLVM: `__aelys_user_main` + C runtime main + +**0.21.3-a** +- Hardened Aelys string ABI: `str` is now consistently lowered as `{ ptr, len }` +- Temporary `print/println` bootstrap, they're reserved during LLVM bootstrap and lowered to `__aelys_write(ptr, len)`; added regression tests for IR ABI and internal `\0` handling. +- Added `s.len` field access on `Str` (untiil proper bootstrapping) + +**0.21.2-a** +- Added `core/` crate (`aelys-core`) as a C runtime static library built via `cc` (`build.rs` + `src/aelys_core.c`). +- LLVM native linking now auto-resolves and links `aelys-core` for `--backend llvm` executables. + +**0.21.1-a** +- AIR pass `copy_elim`: élimination des copies paramètre -> local en single-assignment. +- AIR pass `dead_locals`: suppression des `AirLocal` jamais référencés. + +**0.21.0-a** +- LLVM backend bootstrap: inkwell integration, AIR->LLVM type lowering, function declaration/body codegen, `module.verify()`, and `--backend llvm --emit-llvm-ir`. + ## 0.20.x - Preparing for LLVM -Groundwork for LLVM: sized types, structs, generics, monomorphization, and a new intermediate representation (AIR) with System V AMD64 layout. Nothing implemented in the VM though. I'd rather focus on the new backend than on that. +Groundwork for LLVM: sized types, structs, generics, monomorphization, and a new intermediate representation (AIR) with System V AMD64 layout. Nothing implemented in the VM though. I'd rather focus on the new backend than on that. is mature enough. **0.20.4-a** - AIR pretty-printer, `--emit-air` CLI flag for `compile` command @@ -60,7 +94,7 @@ Language maturity: arrays, vecs, compound operators, dot-syntax string methods, - String indexing with `s[i]` (unicode-aware, returns single-character string) - `for c in "hello" { }` iteration syntax -**0.19.10-a** (i'll squash all of these updates) +**0.19.10-a** - fn foo(mut param: type) now working **0.19.9-a** (not a "real" update again sorry, needed a new tag @@ -109,7 +143,7 @@ Language maturity: arrays, vecs, compound operators, dot-syntax string methods, ## 0.18.x - Native Binary Data Manipulation -This update adds real memory manipulation for @no_gc mode +This update adds real memory manipulation for @no_gc mode & some bug fixes **0.18.6-a** - Fixed call site cache using stale entries after global mutation diff --git a/Cargo.toml b/Cargo.toml index 34ab0f8..38fd831 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,24 +2,20 @@ resolver = "3" members = [ "aelys", + "core", "common", "syntax", "frontend", "sema", "opt", "air", - "bytecode", - "backend", - "runtime", - "modules", + "codegen", "driver", "cli", - "native", - "native-macros", ] [workspace.package] -version = "0.20.3-a" +version = "0.21.8-a" [profile.release] lto = "fat" @@ -30,4 +26,4 @@ strip = true debug = false overflow-checks = false debug-assertions = false -rpath = false \ No newline at end of file +rpath = false diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..256a98c --- /dev/null +++ b/NOTICE @@ -0,0 +1,4 @@ +This project uses LLVM (https://llvm.org/). +Copyright (c) 2003-present, LLVM Contributors. +Licensed under the Apache License 2.0 with LLVM Exceptions. +See https://llvm.org/LICENSE.txt for license information. \ No newline at end of file diff --git a/README.md b/README.md index d0bd251..fa65d81 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,119 @@

- aelys virtual machine + aelys

-CI -License -Release +# Aelys -# aelys 0.20.3-a +A programming language with garbage collection by default and explicit opt-out for performance-critical code. -Register-based VM with dual memory management: GC by default, `@no_gc` for performance-critical code. +Most languages force a single memory model on the entire program. GC'd languages pay for a runtime on every path; systems languages demand manual control everywhere. -You choose between comfort and performance on a per-function basis. +Aelys starts from a tracing GC and lets you leave it behind one function at a time. Default mode gives you heap allocation, type inference, and minimal annotation. `nogc` gives you compiler-enforced zero-allocation with statically checked references, no user-written lifetime annotations. -## Documentation + +```rust +fn load_mesh(path: str) -> Result { + let data = read_file(path) fail |e| IoError::from(e) + let points = parse_vertices(data) fail IoError::ParseFailed + Ok(Mesh { vertices: points }) +} -- [Build Instructions](docs/installation.md) -- [Getting Started Guide](docs/getting-started.md) -- [Language Specification](docs/language-spec.md) -- [Standard Library Documentation](docs/standard-library.md) +nogc fn compute_normals(vertices: &[Vec3], normals: &mut [Vec3]) { + for i in range(0, vertices.len() - 2, step: 3) { + let e1 = vertices[i + 1] - vertices[i] + let e2 = vertices[i + 2] - vertices[i] + normals[i / 3] = cross(e1, e2).normalize() + } +} -## Additional Information +fn main() { + let mesh = match load_mesh("model.obj") or load_mesh("default.obj") { + Ok(m) => m, + Err(IoError::NotFound) => { println("no mesh found"); return }, + Err(IoError::ParseFailed) => { println("mesh is corrupted"); return } + } -- [Performance Benchmarks](docs/performance-benchmarks.md) -- [Acknowledgements](ACKNOWLEDGEMENTS.md) -- [Changelog](CHANGELOG.md) -- [Examples](examples/README.md) -- [License](LICENSE) -- [FAQ](docs/faq.md) + let mut normals = Vec::new(mesh.vertices.len() / 3) + compute_normals(&mesh.vertices, &mut normals) +} +``` + +`load_mesh` is default mode: GC-backed allocation, `Result` for errors, `fail` for propagation, `or` for fallback. `compute_normals` is `nogc`, meaning no heap allocation, no GC containers, and references checked at compile time. In `main`, `&` at the call site marks the boundary where data is borrowed into `nogc` territory. + +GC code calls `nogc` freely; the reverse is a compile error. + +Beyond the memory model: inferred types, pattern matching, Result-based error handling with `fail` for propagation and `or` for fallback composition, direct C header imports through `needs`, and compilation to native code via LLVM. + +## Memory model + +The language provides three levels of control, each narrowing what the runtime provides. + +**Default mode.** A tracing garbage collector manages the heap, types are inferred, and standard collections are GC-backed. Type annotations are optional. `fail` propagates errors, either through a closure for remapping or directly for a fixed error, and `or` provides fallback between results. This is the intended level for most code. + +
+ +**`nogc` functions.** A function-level opt-out from GC allocation. Inside a `nogc` function, GC-backed allocation is rejected at compile time, GC-managed containers cannot be created, references are checked for escape and aliasing violations, and calls into GC code are rejected. + +The compiler proves that the function satisfies these constraints or rejects it. No warnings, no user-written lifetime annotations. `unsafe {}` permits operations the checker cannot validate statically but does not re-enable GC allocation. FFI is handled through `needs`, which imports C headers directly. + +```rust +needs "GL/glext.h" + +nogc fn upload_normals(buffer_id: u32, normals: &[Vec3]) { + let byte_size = (normals.len() * size_of(Vec3)) as isize + unsafe { + glNamedBufferData( + buffer_id, + byte_size, + normals.as_ptr() as *const void, + GL_STATIC_DRAW + ) + } +} +``` + +
+ +**`#![no_gc]` `#![no_std]` modules.** At the module level, these attributes remove the garbage collector, runtime, and standard library entirely. The language exposes raw pointers, `extern fn`, and inline assembly, while parsing, typing, and semantic analysis still apply. This mode is intended for kernels, boot code, and freestanding targets. + +```rust +#![no_gc] +#![no_std] + +extern { + static __bss_start: u8 + static __bss_end: u8 +} + +fn zero_bss() { + let mut ptr = &__bss_start as *const u8 as *mut u8 + let end = &__bss_end as *const u8 + while ptr < end { + *ptr = 0u8 + ptr = ptr + 1 + } +} + +#[no_mangle] +fn kernel_entry() -> ! { + zero_bss() + uart::write("boot ok\n") + loop { asm("wfi") } +} +``` + +## Status + +Aelys is an experimental language and compiler project under active rewrite. It is not ready for use. + +``` +source → parser → semantic analysis → AIR (Aelys IR) → LLVM IR → native code +``` + +Parser and semantic analysis are partially implemented. The `nogc` checker rules are under active design. Codegen targets LLVM. + +Language semantics, the IR, and parts of the standard library are not stable. Open design questions include the GC strategy, panic behavior in `#![no_std]`, iterator design, the GC / `nogc` boundary rules, and static guarantees in freestanding mode. + +## Contributing + +Bug reports, design feedback, and discussion around language semantics and compiler behavior are the most useful contributions right now. The codebase changes quickly, so if you want to contribute code, open an issue first so the work can be aligned with the current direction. diff --git a/_build.ps1 b/_build.ps1 new file mode 100644 index 0000000..9b68e02 --- /dev/null +++ b/_build.ps1 @@ -0,0 +1,11 @@ +$vs = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath +$vc = "$vs\VC\Auxiliary\Build\vcvars64.bat" +cmd /c ('"' + $vc + '" >nul 2>&1 && set') | ForEach-Object { + if ($_ -match '^(.*?)=(.*)$') { + Set-Item -Path "Env:$($matches[1])" -Value $matches[2] + } +} +$env:LLVM_SYS_181_PREFIX = 'C:\llvm' +$env:PATH = "C:\llvm\bin;$env:PATH" +Set-Location 'C:\Users\admin\RustroverProjects\aelys_lang' +.\target\debug\aelys-cli.exe compile test.aelys diff --git a/aelys/Cargo.toml b/aelys/Cargo.toml index 16c5e4b..16741e3 100644 --- a/aelys/Cargo.toml +++ b/aelys/Cargo.toml @@ -5,24 +5,21 @@ edition = "2024" [dependencies] aelys-common = { path = "../common" } -aelys-syntax = { path = "../syntax" } -aelys-frontend = { path = "../frontend" } -aelys-sema = { path = "../sema" } -aelys-opt = { path = "../opt" } -aelys-bytecode = { path = "../bytecode" } -aelys-backend = { path = "../backend" } -aelys-runtime = { path = "../runtime" } -aelys-modules = { path = "../modules" } aelys-driver = { path = "../driver" } [dev-dependencies] tempfile = "3" aelys-opt = { path = "../opt" } aelys-air = { path = "../air" } -aelys-bytecode = { path = "../bytecode" } +aelys-codegen = { path = "../codegen" } +aelys-driver = { path = "../driver" } +aelys-frontend = { path = "../frontend" } +aelys-sema = { path = "../sema" } +aelys-syntax = { path = "../syntax" } +inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", default-features = false, features = ["llvm18-1", "target-x86"] } [build-dependencies] regex = "1" [lib] -doctest = false \ No newline at end of file +doctest = false diff --git a/aelys/src/api/call.rs b/aelys/src/api/call.rs deleted file mode 100644 index 50e4baa..0000000 --- a/aelys/src/api/call.rs +++ /dev/null @@ -1 +0,0 @@ -pub use aelys_driver::{CallableFunction, call_function, get_function}; diff --git a/aelys/src/api/mod.rs b/aelys/src/api/mod.rs index 25f7506..fcc44e6 100644 --- a/aelys/src/api/mod.rs +++ b/aelys/src/api/mod.rs @@ -1,11 +1,2 @@ -mod call; -mod repl; -mod run; -mod vm; - -pub use call::{CallableFunction, call_function, get_function}; -pub use repl::{run_with_vm, run_with_vm_and_opt}; -pub use run::{run, run_source, run_with_config, run_with_config_and_opt}; -pub use vm::{new_vm, new_vm_with_config}; - -pub use aelys_runtime::{VM, Value}; +// Re-export LLVM compilation API only +pub use aelys_driver::{compile_file_with_llvm, compile_to_typed_ast, lower_file_to_air}; diff --git a/aelys/src/api/repl.rs b/aelys/src/api/repl.rs deleted file mode 100644 index 7c5fe6f..0000000 --- a/aelys/src/api/repl.rs +++ /dev/null @@ -1 +0,0 @@ -pub use aelys_driver::{run_with_vm, run_with_vm_and_opt}; diff --git a/aelys/src/api/run.rs b/aelys/src/api/run.rs deleted file mode 100644 index 826d9a1..0000000 --- a/aelys/src/api/run.rs +++ /dev/null @@ -1 +0,0 @@ -pub use aelys_driver::{run, run_source, run_with_config, run_with_config_and_opt}; diff --git a/aelys/src/api/vm.rs b/aelys/src/api/vm.rs deleted file mode 100644 index 8d6c369..0000000 --- a/aelys/src/api/vm.rs +++ /dev/null @@ -1 +0,0 @@ -pub use aelys_driver::{new_vm, new_vm_with_config}; diff --git a/aelys/src/lib.rs b/aelys/src/lib.rs index 4d3ee82..29dc1bd 100644 --- a/aelys/src/lib.rs +++ b/aelys/src/lib.rs @@ -1,19 +1,6 @@ // public facade, re-exports from aelys-driver -// TODO: Test native module bundling and loading on different platforms -// TODO: (CLI) better REPL -// TODO: (VM) Implement JIT compilation for hot functions -// TODO: (VM) Add support for coroutines or async functions -// TODO: (VM) Implement better garbage collection (e.g., Arena GC) -// TODO: (VM) Way better FFI/we should be able to directly import .h files -// TODO: (Lexer/Parser) Revise token definitions and syntax for better clarity -// TODO: (Code Quality) consider Rc> for globals/global_indices in compiler -// TODO: (Code Quality) clean up those unsufferable functions in the compiler -// TODO: (Optimization) Improve constant folding to handle more complex expressions -// TODO: (Optimization) Tail call optimization -// TODO: (Optimization) Preallocated register pools -// TODO: (Optimization) Implement loop unrolling optimization pass -// TODO: (Optimization) Implement function inlining optimization pass (and @inline decorator) -// TODO: Custom modules share VM's global namespace, risk of collision if two modules export the same symbol (efor example mod_a::shared overwrites mod_b::shared as both become just shared in VM) + +// TODO: move the giant aelys/tests/* in their dedicated tests/ crates pub mod api; pub use api::*; diff --git a/aelys/tests/aasm_roundtrip_tests.rs b/aelys/tests/aasm_roundtrip_tests.rs deleted file mode 100644 index fe981a8..0000000 --- a/aelys/tests/aasm_roundtrip_tests.rs +++ /dev/null @@ -1,298 +0,0 @@ -use aelys_backend::Compiler; -use aelys_bytecode::asm::{assemble, deserialize, disassemble, serialize}; -use aelys_frontend::lexer::Lexer; -use aelys_frontend::parser::Parser; -use aelys_runtime::Heap; -use aelys_runtime::{VM, Value}; -use aelys_syntax::Source; - -/// Run source code directly -fn run_source(source: &str) -> Value { - aelys::run(source, "").expect("Execution failed") -} - -/// Helper to compile source to bytecode and heap -fn compile_source(source: &str) -> (aelys_runtime::Function, Heap) { - let src = Source::new("", source); - let tokens = Lexer::with_source(src.clone()) - .scan() - .expect("Lexer failed"); - let stmts = Parser::new(tokens, src.clone()) - .parse() - .expect("Parser failed"); - - // Use typed compilation pipeline - let typed_program = aelys_sema::TypeInference::infer_program(stmts, src.clone()) - .expect("Type inference failed"); - let (func, heap, _globals) = Compiler::new(None, src) - .compile_typed(&typed_program) - .expect("Compiler failed"); - - (func, heap) -} - -/// Run a function with a fresh heap (for roundtrip testing) -fn run_function_with_heap(mut func: aelys_runtime::Function, mut heap: Heap) -> Value { - let src = Source::new("", ""); - let mut vm = VM::new(src).unwrap(); - - let remap = vm.merge_heap(&mut heap).unwrap(); - func.remap_constants(&remap); - - let func_ref = vm.alloc_function(func).unwrap(); - vm.execute(func_ref).expect("Execution failed") -} - -#[test] -fn test_asm_roundtrip_simple() { - let source = "42"; - let result_direct = run_source(source); - - // Roundtrip through .aasm - let (func, heap) = compile_source(source); - let asm_text = disassemble(&func, Some(&heap)); - let (functions, asm_heap) = assemble(&asm_text).expect("Assemble failed"); - let result_roundtrip = run_function_with_heap(functions.into_iter().next().unwrap(), asm_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); -} - -#[test] -fn test_bytecode_asm_roundtrip_api() { - let src = r#" -.function 0 - .name "main" - .arity 0 - .registers 0 - - .code - 0000: Return0 -"#; - let (functions, heap) = aelys_bytecode::asm::assemble(src).expect("Assemble failed"); - let text = aelys_bytecode::asm::disassemble(&functions[0], Some(&heap)); - assert!(text.contains(".function 0")); -} - -#[test] -fn test_asm_roundtrip_arithmetic() { - let source = "(10 + 5) * 2 - 3"; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let asm_text = disassemble(&func, Some(&heap)); - let (functions, asm_heap) = assemble(&asm_text).expect("Assemble failed"); - let result_roundtrip = run_function_with_heap(functions.into_iter().next().unwrap(), asm_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); - assert_eq!(result_direct.as_int(), Some(27)); -} - -#[test] -fn test_asm_roundtrip_conditionals() { - // Simple conditional expression (not if-else statement) - // Use ternary-style: the last expression is returned - let source = "10 > 5"; // Returns true/false - simpler test for jumps - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let asm_text = disassemble(&func, Some(&heap)); - let (functions, asm_heap) = assemble(&asm_text).expect("Assemble failed"); - let result_roundtrip = run_function_with_heap(functions.into_iter().next().unwrap(), asm_heap); - - assert_eq!(result_direct.as_bool(), result_roundtrip.as_bool()); - assert_eq!(result_direct.as_bool(), Some(true)); -} - -#[test] -fn test_asm_roundtrip_while_loop() { - let source = r#" - let mut sum = 0 - let mut i = 1 - while i <= 5 { - sum += i - i++ - } - sum - "#; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let asm_text = disassemble(&func, Some(&heap)); - let (functions, asm_heap) = assemble(&asm_text).expect("Assemble failed"); - let result_roundtrip = run_function_with_heap(functions.into_iter().next().unwrap(), asm_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); - assert_eq!(result_direct.as_int(), Some(15)); // 1+2+3+4+5 -} - -#[test] -fn test_binary_roundtrip_simple() { - let source = "42"; - let result_direct = run_source(source); - - // Roundtrip through .avbc - let (func, heap) = compile_source(source); - let bytes = serialize(&func, &heap); - let (loaded_func, loaded_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_roundtrip = run_function_with_heap(loaded_func, loaded_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); -} - -#[test] -fn test_binary_roundtrip_with_strings() { - // Note: We can't easily capture print output, so we just verify the roundtrip works - let source = r#" - let x = "hello" - 42 - "#; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let bytes = serialize(&func, &heap); - let (loaded_func, loaded_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_roundtrip = run_function_with_heap(loaded_func, loaded_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); -} - -#[test] -fn test_binary_roundtrip_with_floats() { - let source = "3.14159"; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let bytes = serialize(&func, &heap); - let (loaded_func, loaded_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_roundtrip = run_function_with_heap(loaded_func, loaded_heap); - - let direct_f = result_direct.as_float().expect("Expected float"); - let roundtrip_f = result_roundtrip.as_float().expect("Expected float"); - assert!((direct_f - roundtrip_f).abs() < 0.00001); -} - -#[test] -fn test_binary_roundtrip_conditionals() { - let source = r#" - let x = 10 - if x > 5 { x + 1 } else { x - 1 } - "#; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let bytes = serialize(&func, &heap); - let (loaded_func, loaded_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_roundtrip = run_function_with_heap(loaded_func, loaded_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); -} - -#[test] -fn test_double_roundtrip() { - // source -> bytecode -> .aasm -> bytecode -> .avbc -> bytecode - let source = r#" - let x = 10 - let y = 20 - x + y - "#; - let result_direct = run_source(source); - - // First roundtrip: through .aasm - let (func, heap) = compile_source(source); - let asm_text = disassemble(&func, Some(&heap)); - let (asm_funcs, asm_heap) = assemble(&asm_text).expect("Assemble failed"); - - // Second roundtrip: through .avbc - let bytes = serialize(&asm_funcs[0], &asm_heap); - let (final_func, final_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_final = run_function_with_heap(final_func, final_heap); - - assert_eq!(result_direct.as_int(), result_final.as_int()); - assert_eq!(result_direct.as_int(), Some(30)); -} - -#[test] -fn test_empty_function() { - let func = aelys_runtime::Function::new(Some("empty".to_string()), 0); - let heap = Heap::new(); - - // Test disassemble - let asm_text = disassemble(&func, Some(&heap)); - assert!(asm_text.contains(".function 0")); - assert!(asm_text.contains(".name \"empty\"")); - - // Test binary roundtrip - let bytes = serialize(&func, &heap); - let (loaded, _) = deserialize(&bytes).expect("Deserialize failed"); - assert_eq!(loaded.name, Some("empty".to_string())); -} - -#[test] -fn test_negative_numbers() { - let source = "-42"; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let asm_text = disassemble(&func, Some(&heap)); - let (functions, asm_heap) = assemble(&asm_text).expect("Assemble failed"); - let result_roundtrip = run_function_with_heap(functions.into_iter().next().unwrap(), asm_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); - assert_eq!(result_direct.as_int(), Some(-42)); -} - -#[test] -fn test_large_numbers() { - let source = "123456789"; - let result_direct = run_source(source); - - // Binary roundtrip - let (func, heap) = compile_source(source); - let bytes = serialize(&func, &heap); - let (loaded_func, loaded_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_roundtrip = run_function_with_heap(loaded_func, loaded_heap); - - assert_eq!(result_direct.as_int(), result_roundtrip.as_int()); -} - -#[test] -fn test_booleans() { - let source = "true"; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let bytes = serialize(&func, &heap); - let (loaded_func, loaded_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_roundtrip = run_function_with_heap(loaded_func, loaded_heap); - - assert_eq!(result_direct.as_bool(), result_roundtrip.as_bool()); - assert_eq!(result_direct.as_bool(), Some(true)); -} - -#[test] -fn test_null() { - let source = "null"; - let result_direct = run_source(source); - - let (func, heap) = compile_source(source); - let bytes = serialize(&func, &heap); - let (loaded_func, loaded_heap) = deserialize(&bytes).expect("Deserialize failed"); - let result_roundtrip = run_function_with_heap(loaded_func, loaded_heap); - - assert!(result_direct.is_null()); - assert!(result_roundtrip.is_null()); -} - -#[test] -fn test_string_escaping() { - // Test that strings with special characters survive roundtrip - let source = r#""hello\nworld""#; - let (func, heap) = compile_source(source); - - // Just verify it doesn't crash - let asm_text = disassemble(&func, Some(&heap)); - assert!(asm_text.contains("\\n")); // Should be escaped - - let bytes = serialize(&func, &heap); - let (_, _) = deserialize(&bytes).expect("Deserialize failed"); -} diff --git a/aelys/tests/aasm_unit_tests.rs b/aelys/tests/aasm_unit_tests.rs deleted file mode 100644 index 8a42219..0000000 --- a/aelys/tests/aasm_unit_tests.rs +++ /dev/null @@ -1,102 +0,0 @@ -use aelys_bytecode::asm::disasm::escape_string; -use aelys_bytecode::asm::{assemble, deserialize, disassemble, serialize}; -use aelys_runtime::{Function, Heap, Value}; - -#[test] -fn test_escape_string() { - assert_eq!(escape_string("hello"), "hello"); - assert_eq!(escape_string("hello\nworld"), "hello\\nworld"); - assert_eq!(escape_string("tab\there"), "tab\\there"); - assert_eq!(escape_string("quote\"here"), "quote\\\"here"); - assert_eq!(escape_string("back\\slash"), "back\\\\slash"); -} - -#[test] -fn test_disassemble_empty_function() { - let func = Function::new(Some("test".to_string()), 0); - let output = disassemble(&func, None); - assert!(output.contains(".function 0")); - assert!(output.contains(".name \"test\"")); - assert!(output.contains(".arity 0")); -} - -#[test] -fn test_basic_assembly() { - let source = r#" -.version 1 - -.function 0 - .name "main" - .arity 0 - .registers 2 - - .code - 0000: LoadI r0, 42 - 0001: Print r0 - 0002: Return0 -"#; - let (functions, _heap) = assemble(source).unwrap(); - assert_eq!(functions.len(), 1); - assert_eq!(functions[0].name, Some("main".to_string())); - assert_eq!(functions[0].arity, 0); - assert_eq!(functions[0].bytecode.len(), 3); -} - -#[test] -fn test_label_resolution() { - let source = r#" -.function 0 - .arity 0 - .registers 1 - - .code - 0000: JumpIfNot r0, L0 - 0001: LoadI r0, 1 - 0002: Jump L1 - L0: - 0003: LoadI r0, 0 - L1: - 0004: Return r0 -"#; - let (functions, _heap) = assemble(source).unwrap(); - assert_eq!(functions[0].bytecode.len(), 5); -} - -#[test] -fn test_binary_basic_roundtrip() { - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 2; - func.set_bytecode(vec![0x01_00_00_2A, 0x22_00_00_00]); // LoadI r0, 42; Print r0 - - let heap = Heap::new(); - let bytes = serialize(&func, &heap); - - assert_eq!(&bytes[0..4], b"VBXQ"); - - let (loaded, _) = deserialize(&bytes).unwrap(); - assert_eq!(loaded.name, Some("test".to_string())); - assert_eq!(loaded.arity, 0); - assert_eq!(loaded.num_registers, 2); - assert_eq!(loaded.bytecode, func.bytecode); -} - -#[test] -fn test_binary_with_constants() { - let mut func = Function::new(None, 0); - func.constants = vec![ - Value::int(42), - Value::float(2.72), - Value::bool(true), - Value::null(), - ]; - - let heap = Heap::new(); - let bytes = serialize(&func, &heap); - let (loaded, _) = deserialize(&bytes).unwrap(); - - assert_eq!(loaded.constants.len(), 4); - assert_eq!(loaded.constants[0].as_int(), Some(42)); - assert!((loaded.constants[1].as_float().unwrap() - 2.72).abs() < 0.001); - assert_eq!(loaded.constants[2].as_bool(), Some(true)); - assert!(loaded.constants[3].is_null()); -} diff --git a/aelys/tests/adversarial_tests.rs b/aelys/tests/adversarial_tests.rs deleted file mode 100644 index ba06f44..0000000 --- a/aelys/tests/adversarial_tests.rs +++ /dev/null @@ -1,428 +0,0 @@ -mod common; -use aelys_runtime::{Function, OpCode, VM, Value}; -use aelys_syntax::Source; -use common::*; - -// Bytecode verifier bypass attempts - -#[test] -fn malicious_bytecode_invalid_opcode() { - let mut vm = VM::new(Source::new("test.aelys", "")).unwrap(); - let mut func = Function::new(Some("malicious".to_string()), 0); - func.num_registers = 1; - - // Try to inject invalid opcode - func.set_bytecode(vec![0xDEADBEEF]); - - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref); - - assert!(result.is_err()); -} - -#[test] -fn bytecode_constant_pool_oob() { - let mut vm = VM::new(Source::new("test.aelys", "")).unwrap(); - let mut func = Function::new(Some("const_oob".to_string()), 0); - func.num_registers = 1; - func.constants.push(Value::int(42)); - - // Try to access constant index 5 when only 1 exists - func.emit_a(OpCode::LoadK, 0, 5, 0, 1); - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref); - - assert!(result.is_err()); -} - -#[test] -fn bytecode_jump_to_invalid_address() { - let mut vm = VM::new(Source::new("test.aelys", "")).unwrap(); - let mut func = Function::new(Some("bad_jump".to_string()), 0); - func.num_registers = 1; - - // Jump way beyond bytecode - func.emit_b(OpCode::Jump, 0, 1000, 1); - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref); - - // Should error when trying to execute beyond bytecode - assert!(result.is_err()); -} - -#[test] -fn bytecode_negative_jump() { - let mut vm = VM::new(Source::new("test.aelys", "")).unwrap(); - let mut func = Function::new(Some("neg_jump".to_string()), 0); - func.num_registers = 1; - - func.emit_b(OpCode::Jump, 0, -100, 1); - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref); - - assert!(result.is_err()); -} - -// Path traversal attacks -// (network_recv_buffer_overflow → stdlib_net_tests::recv_bytes_exceeds_max_buffer) -// (path_traversal_dotdot → security_audit_tests::fs_join_rejects_parent_escape) -// (path_traversal_absolute → security_audit_tests::fs_join_rejects_absolute_path) - -#[test] -fn path_traversal_url_encoded() { - let code = r#" -needs std.fs -fs.join("/app", "..%2F..%2Fetc%2Fpasswd") -"#; - let err = run_aelys_err(code); - // Should still be caught - assert!(err.contains("capability") || err.contains("escapes")); -} - -// Type confusion attacks - -#[test] -fn type_confusion_null_as_int() { - let code = r#" -let x = null -x + 5 -"#; - let err = run_aelys_err(code); - assert!(err.contains("type") || err.contains("Type")); -} - -#[test] -fn type_confusion_string_as_number() { - let code = r#" -let x = "hello" -x * 2 -"#; - let err = run_aelys_err(code); - assert!(err.contains("type") || err.contains("Type")); -} - -#[test] -fn type_confusion_bool_arithmetic() { - let code = r#" -let x = true -x + 10 -"#; - let err = run_aelys_err(code); - assert!(err.contains("type") || err.contains("Type")); -} - -// Resource exhaustion attacks - -#[test] -#[ignore] -fn infinite_string_concat_oom() { - let code = r#" -let mut s = "x" -let mut i = 0 -while i < 100000 { - s = s + s - i++ -} -42 -"#; - // This might OOM or hit limits, either is fine - let result = run_aelys_result(code); - // If it completes, that's also OK (we have limits) - let _ = result; -} - -#[test] -#[ignore] -fn deep_recursion_stack_overflow() { - let code = r#" -fn recurse(n) { - return recurse(n + 1) -} -recurse(0) -"#; - let err = run_aelys_err(code); - // Should hit MAX_FRAMES limit - assert!( - err.contains("stack") - || err.contains("frame") - || err.contains("recursion") - || err.contains("Stack") - ); -} - -#[test] -#[ignore] -fn allocation_bomb() { - let code = r#" -let mut i = 0 -while i < 100000 { - let p = alloc(1000) - i++ -} -42 -"#; - // Might complete or OOM, either is acceptable - let _ = run_aelys_result(code); -} - -// Command injection attempts - -#[test] -fn exec_shell_injection_attempt() { - let code = r#" -needs std.sys -sys.exec("echo hello; rm -rf /") -"#; - let err = run_aelys_err(code); - assert!(err.contains("capability")); -} - -#[test] -fn exec_args_should_prevent_injection() { - // exec_args doesn't use shell, so shell metacharacters should be literal - let code = r#" -needs std.sys -sys.exec_args("echo", "hello; rm -rf /") -"#; - let err = run_aelys_err(code); - assert!(err.contains("capability")); -} - -// Integer overflow/underflow - -#[test] -fn integer_overflow_checked() { - let code = r#" -let max = 140737488355327 -max + 1 -"#; - // This might overflow or be handled - let _ = run_aelys(code); -} - -#[test] -fn integer_multiply_overflow() { - let code = r#" -let big = 10000000000 -big * big -"#; - let _ = run_aelys(code); -} - -// Format string attacks - -#[test] -fn time_format_string_attack() { - let code = r#" -format("%n%n%n%n%n") -"#; - // Should not crash - let _ = run_aelys(code); -} - -// Binary deserialization attacks - -#[test] -fn binary_oversized_function_count() { - let mut bytes = Vec::new(); - bytes.extend_from_slice(b"VBXQ"); - bytes.extend_from_slice(&1u16.to_le_bytes()); - bytes.extend_from_slice(&0u16.to_le_bytes()); - bytes.extend_from_slice(&1u32.to_le_bytes()); - bytes.extend_from_slice(&0u32.to_le_bytes()); - - // Try to claim 1 million nested functions - bytes.extend_from_slice(&0u16.to_le_bytes()); // name len - bytes.push(0u8); // arity - bytes.push(0u8); // num_registers - bytes.extend_from_slice(&0u16.to_le_bytes()); // constants - bytes.extend_from_slice(&1u32.to_le_bytes()); // bytecode length - bytes.extend_from_slice(&0u32.to_le_bytes()); // Return0 - bytes.extend_from_slice(&0u16.to_le_bytes()); // upvalues - bytes.extend_from_slice(&0u16.to_le_bytes()); // line numbers - bytes.extend_from_slice(&1000000u32.to_le_bytes()); // nested functions - - let result = aelys_bytecode::asm::deserialize(&bytes); - assert!(result.is_err()); -} - -#[test] -fn binary_oversized_constants() { - let mut bytes = Vec::new(); - bytes.extend_from_slice(b"VBXQ"); - bytes.extend_from_slice(&1u16.to_le_bytes()); - bytes.extend_from_slice(&0u16.to_le_bytes()); - bytes.extend_from_slice(&1u32.to_le_bytes()); - bytes.extend_from_slice(&0u32.to_le_bytes()); - bytes.extend_from_slice(&0u16.to_le_bytes()); - bytes.push(0u8); - bytes.push(0u8); - bytes.extend_from_slice(&65535u16.to_le_bytes()); // Max u16 constants - - let result = aelys_bytecode::asm::deserialize(&bytes); - assert!(result.is_err()); -} - -// Unicode exploitation attempts - -#[test] -fn unicode_bidi_override_attack() { - // Right-to-left override can hide malicious code - // Test that the code runs safely without issues - let code = r#" -let safe = "hello" -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn unicode_homoglyph_attack() { - // Using lookalike characters (Cyrillic vs Latin) - let code = r#" -let х = 42 -х -"#; - // х is Cyrillic, should work fine - assert_aelys_int(code, 42); -} - -// Division by zero variations - -#[test] -fn division_by_zero_direct() { - let code = "10 / 0"; - assert_aelys_error_contains(code, "division"); -} - -#[test] -fn division_by_zero_variable() { - let code = r#" -let x = 0 -10 / x -"#; - assert_aelys_error_contains(code, "division"); -} - -#[test] -fn modulo_by_zero() { - let code = "10 % 0"; - assert_aelys_error_contains(code, "division"); -} - -// Memory safety - -#[test] -fn use_after_free_manual_heap() { - let code = r#" -let p = alloc(10) -store(p, 0, 42) -free(p) -load(p, 0) -"#; - assert_aelys_error_contains(code, "freed"); -} - -#[test] -fn double_free_manual_heap() { - let code = r#" -let p = alloc(10) -free(p) -free(p) -"#; - assert_aelys_error_contains(code, "free"); -} - -#[test] -fn manual_heap_buffer_overflow() { - let code = r#" -let p = alloc(5) -load(p, 100) -"#; - assert_aelys_error_contains(code, "bound"); -} - -#[test] -fn manual_heap_negative_offset() { - let code = r#" -let p = alloc(10) -load(p, -5) -"#; - assert_aelys_error_contains(code, "negative"); -} - -// GC exploitation - -#[test] -fn gc_collection_during_critical_section() { - let code = r#" -let mut i = 0 -while i < 5000 { - let s1 = "string" + " concatenation" - let s2 = "more " + "strings" - let s3 = s1 + s2 - i++ -} -42 -"#; - assert_aelys_int(code, 42); -} - -// capability bypass attempts - -#[test] -fn fs_access_without_capability() { - let code = r#" -needs std.fs -fs.read_text("/etc/passwd") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn net_access_without_capability() { - // Net requires capability for all network operations - let code = r#" -needs std.net -net.connect("www.google.com", 80) -"#; - // Network operations require capability - let err = run_aelys_err(code); - assert!(err.contains("capability") || err.contains("Capability")); -} - -// (exec_without_capability → stdlib_sys_tests::sys_exec_denied_without_capability) - -// Prototype pollution attempts (not applicable, but test object safety) - -#[test] -fn function_modification_attempt() { - let code = r#" -fn test() { return 42 } -let result = test() -result -"#; - assert_aelys_int(code, 42); -} - -// Null pointer dereference - -#[test] -fn null_function_call() { - let code = r#" -let f = null -f() -"#; - let err = run_aelys_err(code); - assert!(err.contains("not callable") || err.contains("type") || err.contains("Type")); -} diff --git a/aelys/tests/air_layout_tests.rs b/aelys/tests/air_layout_tests.rs index 97854a6..ee115ee 100644 --- a/aelys/tests/air_layout_tests.rs +++ b/aelys/tests/air_layout_tests.rs @@ -1,5 +1,5 @@ use aelys_air::{ - AirProgram, AirStructDef, AirStructField, AirType, CallingConv, + AirEnumDef, AirEnumVariant, AirProgram, AirStructDef, AirStructField, AirType, CallingConv, layout::{compute_layouts, layout_of}, }; @@ -25,9 +25,40 @@ fn program(structs: Vec) -> AirProgram { AirProgram { functions: vec![], structs, + enums: vec![], globals: vec![], source_files: vec![], mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + } +} + +fn enum_def(name: &str, variants: Vec) -> AirEnumDef { + AirEnumDef { + name: name.to_string(), + type_params: vec![], + variants, + span: None, + } +} + +fn variant(name: &str, tag: u32, payload: Vec) -> AirEnumVariant { + AirEnumVariant { + name: name.to_string(), + tag, + payload, + } +} + +fn program_with_enums(structs: Vec, enums: Vec) -> AirProgram { + AirProgram { + functions: vec![], + structs, + enums, + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), } } @@ -38,7 +69,7 @@ fn primitives() { assert_eq!(layout_of(&AirType::I32).size, 4); assert_eq!(layout_of(&AirType::F64).size, 8); assert_eq!(layout_of(&AirType::Bool).size, 1); - assert_eq!(layout_of(&AirType::Str).size, 8); + assert_eq!(layout_of(&AirType::Str).size, 16); assert_eq!(layout_of(&AirType::Void).size, 0); assert_eq!(layout_of(&AirType::Void).align, 1); assert_eq!(layout_of(&AirType::Slice(Box::new(AirType::I32))).size, 16); @@ -49,7 +80,8 @@ fn primitives() { ret: Box::new(AirType::Void), conv: CallingConv::Aelys, }; - assert_eq!(layout_of(&fnptr).size, 8); + // Aelys closure values are fat pointers { fn_ptr, env_ptr } = 16 bytes. + assert_eq!(layout_of(&fnptr).size, 16); } #[test] @@ -206,6 +238,33 @@ fn array_of_struct_field() { assert_eq!(prog.structs[1].fields[1].offset, Some(4)); } +#[test] +fn enum_struct_dependency_chain_resolves() { + let mut prog = program_with_enums( + vec![ + sdef("Payload", vec![field("x", AirType::I64)]), + sdef( + "Wrapper", + vec![ + field("e", AirType::Enum("Message".into())), + field("flag", AirType::I8), + ], + ), + ], + vec![enum_def( + "Message", + vec![ + variant("None", 0, vec![]), + variant("Some", 1, vec![AirType::Struct("Payload".into())]), + ], + )], + ); + compute_layouts(&mut prog); + assert_eq!(prog.structs[0].fields[0].offset, Some(0)); + assert_eq!(prog.structs[1].fields[0].offset, Some(0)); + assert_eq!(prog.structs[1].fields[1].offset, Some(16)); +} + // Closure env with mixed captures: i64@0, bool@8, str@16 #[test] fn closure_env() { @@ -227,21 +286,41 @@ fn closure_env() { } #[test] -#[should_panic(expected = "infinite size")] -fn self_reference_panics() { +fn self_reference_reports_error() { let mut prog = program(vec![sdef( "Bad", vec![field("inner", AirType::Struct("Bad".into()))], )]); - compute_layouts(&mut prog); + let errors = compute_layouts(&mut prog); + assert!(!errors.is_empty(), "expected error for self-referencing struct"); + assert!(errors[0].contains("infinite size")); } #[test] -#[should_panic(expected = "recursive struct cycle")] -fn mutual_cycle_panics() { +fn mutual_cycle_reports_error() { let mut prog = program(vec![ sdef("A", vec![field("b", AirType::Struct("B".into()))]), sdef("B", vec![field("a", AirType::Struct("A".into()))]), ]); - compute_layouts(&mut prog); + let errors = compute_layouts(&mut prog); + assert!(!errors.is_empty(), "expected error for mutual cycle"); +} + +#[test] +fn struct_enum_cycle_reports_error() { + let mut prog = program_with_enums( + vec![sdef( + "Node", + vec![field("next", AirType::Enum("OptionNode".into()))], + )], + vec![enum_def( + "OptionNode", + vec![ + variant("Some", 0, vec![AirType::Struct("Node".into())]), + variant("None", 1, vec![]), + ], + )], + ); + let errors = compute_layouts(&mut prog); + assert!(!errors.is_empty(), "expected error for struct-enum cycle"); } diff --git a/aelys/tests/air_lower_tests.rs b/aelys/tests/air_lower_tests.rs index 9028e42..6c91d77 100644 --- a/aelys/tests/air_lower_tests.rs +++ b/aelys/tests/air_lower_tests.rs @@ -183,3 +183,249 @@ fn float_literal_defaults_to_f64() { Some(AirConst::Float(_, AirFloatSize::F64)) )); } + +#[test] +fn top_level_global_read_uses_global_get_instead_of_closure_env() { + let air = lower_source( + r#" +let g = 7 + +fn main() -> i64 { + return g +} +"#, + ); + + let main = func(&air, "main"); + assert!(main.params.is_empty(), "main should not get a hidden env param"); + assert!( + !air.structs.iter().any(|s| s.name == "__closure_env_main"), + "top-level globals must not synthesize a closure env" + ); + assert!( + has_named_call(main, "__aelys_global_get_g"), + "global reads should lower to the synthetic global getter call" + ); +} + +#[test] +fn top_level_simple_enum_global_lowers_to_const_tag() { + let air = lower_source( + r#" +enum Color { + Red, + Green, +} + +let c: Color = Color::Green +"#, + ); + + let global = air + .globals + .iter() + .find(|g| g.name == "c") + .expect("global 'c' not found"); + assert!(matches!(global.init, Some(AirConst::Int(1, AirIntSize::I32)))); +} + +#[test] +fn top_level_data_enum_unit_global_lowers_to_const_tag() { + let air = lower_source( + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::None +"#, + ); + + let global = air + .globals + .iter() + .find(|g| g.name == "g") + .expect("global 'g' not found"); + assert!(matches!(global.init, Some(AirConst::Int(1, AirIntSize::I32)))); +} + +#[test] +fn top_level_data_enum_payload_global_lowers_to_const_enum() { + let air = lower_source( + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::Some(42) +"#, + ); + + let global = air + .globals + .iter() + .find(|g| g.name == "g") + .expect("global 'g' not found"); + assert!(matches!( + global.init, + Some(AirConst::Enum { ref enum_name, tag: 0, ref payload }) + if enum_name == "__mono_Option_i64" + && matches!(payload.as_slice(), [AirConst::Int(42, AirIntSize::I64)]) + )); +} + +#[test] +fn top_level_fnptr_global_alias_lowers_to_target_fnref() { + let air = lower_source( + r#" +let g: fn() -> i64 = main +let h: fn() -> i64 = g + +fn main() -> i64 { + return h() +} +"#, + ); + + let global = air + .globals + .iter() + .find(|g| g.name == "h") + .expect("global 'h' not found"); + assert!(matches!( + global.init, + Some(AirConst::FnRef(ref name)) if name == "main" + )); +} + +#[test] +fn top_level_data_enum_global_alias_clones_const_initializer() { + let air = lower_source( + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::None +let h: Option = g +"#, + ); + + let global = air + .globals + .iter() + .find(|g| g.name == "h") + .expect("global 'h' not found"); + assert!(matches!(global.init, Some(AirConst::Int(1, AirIntSize::I32)))); +} + +// regression test: Null -> Ptr(Void) + +#[test] +fn null_literal_lowers_to_ptr_void_not_bare_void() { + // InferType::Null was lowered to AirType::Void, causing 0-byte allocations + // It should be AirType::Ptr(Box::new(AirType::Void)) + let air = lower_source( + r#" +fn make_null() { + let x = null +} +"#, + ); + let f = func(&air, "make_null"); + let null_local = f + .locals + .iter() + .find(|l| l.name.as_deref() == Some("x")) + .expect("local 'x' not found"); + assert_eq!( + null_local.ty, + AirType::Ptr(Box::new(AirType::Void)), + "null literal should produce Ptr(Void), not bare Void" + ); +} + +#[test] +fn null_literal_type_is_not_void() { + // ensure the local for a null-typed variable is not AirType::Void + let air = lower_source( + r#" +fn test_null() { + let y = null +} +"#, + ); + let f = func(&air, "test_null"); + let local = f + .locals + .iter() + .find(|l| l.name.as_deref() == Some("y")) + .expect("local 'y' not found"); + assert_ne!( + local.ty, + AirType::Void, + "null-typed local must not be bare Void (would cause 0-byte alloca)" + ); +} + +// array size validation +// verify that invalid user code produces a descriptive compile error via the lowering_errors mechanism instead of a raw Rust panic + +#[test] +#[should_panic(expected = "AIR lowering failed")] +fn non_constant_array_size_in_expr_produces_error() { + // [fill; n] where n is a variable should produce a clean compile error + lower_source( + r#" +fn f(n: i64) -> i64 { + let arr = [0; n] + return 0 +} +"#, + ); +} + +#[test] +#[should_panic(expected = "non-constant array size")] +fn non_constant_array_size_error_message_is_descriptive() { + // verify the error message mentions what went wrong. + lower_source( + r#" +fn g(size: i64) -> i64 { + let arr = [42; size] + return 0 +} +"#, + ); +} + +#[test] +#[should_panic(expected = "stack array too large")] +fn oversized_stack_array_in_expr_produces_error() { + // a very large [fill; N] should produce a clean compile error + lower_source( + r#" +fn h() -> i64 { + let x = [0; 200000] + return x[0] +} +"#, + ); +} + +#[test] +#[should_panic(expected = "AIR lowering failed")] +fn oversized_array_error_aggregated_in_finish() { + // verify that the error is reported through the aggregated finish() mechanism. + lower_source( + r#" +fn big() -> i64 { + let huge = [1; 300000] + return huge[0] +} +"#, + ); +} diff --git a/aelys/tests/air_print_tests.rs b/aelys/tests/air_print_tests.rs index 2d13e88..ef1f35f 100644 --- a/aelys/tests/air_print_tests.rs +++ b/aelys/tests/air_print_tests.rs @@ -5,9 +5,11 @@ fn empty_program() -> AirProgram { AirProgram { functions: vec![], structs: vec![], + enums: vec![], globals: vec![], source_files: vec![], mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), } } diff --git a/aelys/tests/air_validation_tests.rs b/aelys/tests/air_validation_tests.rs index e9a24dc..b840007 100644 --- a/aelys/tests/air_validation_tests.rs +++ b/aelys/tests/air_validation_tests.rs @@ -1,6 +1,7 @@ use aelys_air::layout::compute_layouts; use aelys_air::lower::{lower, lower_with_gc_mode}; use aelys_air::mono::monomorphize; +use aelys_air::passes::validate::{AirValidationDetail, validate_air}; use aelys_air::*; use aelys_frontend::lexer::Lexer; use aelys_frontend::parser::Parser; @@ -37,6 +38,15 @@ fn func<'a>(air: &'a AirProgram, name: &str) -> &'a AirFunction { .unwrap_or_else(|| panic!("function `{}` not found in AIR", name)) } +fn default_attribs() -> FunctionAttribs { + FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + } +} + #[test] fn stdlib_call_print() { let air = lower_with_globals( @@ -167,12 +177,12 @@ fn outer() { ); assert!( !field_names.contains(&"print"), - "closure env should NOT capture global `print`, got: {:?}", + "closure env should not capture global `print`, got: {:?}", field_names ); assert!( !field_names.contains(&"println"), - "closure env should NOT capture global `println`, got: {:?}", + "closure env should not capture global `println`, got: {:?}", field_names ); } @@ -324,7 +334,7 @@ fn caller() -> i32 { let mut program = air; compute_layouts(&mut program); - let program = monomorphize(program); + let program = monomorphize(program).unwrap(); let mono_fn = program .functions @@ -368,6 +378,60 @@ fn caller() -> i32 { !program.functions.iter().any(|f| f.name == "identity"), "original generic `identity` should be removed after monomorphization" ); + + // verify that monomorphization patched the caller's local type + // to match the monomorphized return type (was I64 placeholder from Dynamic). + let call_result_local = caller.blocks.iter().find_map(|b| { + b.stmts.iter().find_map(|s| match &s.kind { + AirStmtKind::Assign { + place: Place::Local(id), + rvalue: + Rvalue::Call { + func: Callee::Named(n), + .. + }, + } if n.contains("__mono_identity_i32") => Some(*id), + _ => None, + }) + }); + if let Some(local_id) = call_result_local { + let local_ty = caller + .locals + .iter() + .find(|l| l.id == local_id) + .map(|l| &l.ty); + assert_eq!( + local_ty, + Some(&AirType::I32), + "caller's temp for identity result should have type I32 after mono, not I64" + ); + } +} + +#[test] +fn generic_struct_decl_only_does_not_introduce_unresolved_air_params() { + let mut air = lower_source( + r#" +struct Box { value: T } +fn main() { +} +"#, + ); + compute_layouts(&mut air); + let mut air = monomorphize(air).unwrap(); + passes::copy_elim::eliminate_copies(&mut air); + passes::dead_locals::eliminate_dead_locals(&mut air); + + let result = validate_air(&air); + assert!( + result.is_ok(), + "generic struct declaration without instantiation should not produce unresolved AIR params, errors: {:?}", + result.err() + ); + assert!( + !air.structs.iter().any(|s| s.name == "Box"), + "uninstantiated generic struct declarations should not be lowered to AIR structs" + ); } #[test] @@ -418,3 +482,1425 @@ fn some_func() { "file-level Manual gc mode should propagate to functions" ); } + +#[test] +fn copy_elim_removes_param_to_local_copies() { + let mut air = lower_source( + r#" +fn align_probe(x: i64, y: i32, z: i16, w: i8, b: bool, f: f32, d: f64, p: string) -> i64 { + let a64: i64 = x + let a32: i32 = y + let a16: i16 = z + let a8: i8 = w + let ab: bool = b + let af32: f32 = f + let af64: f64 = d + let sp: string = p + return a64 + (a32 as i64) + (a16 as i64) + (a8 as i64) + (ab as i64) + (af32 as i64) + (af64 as i64) +} +"#, + ); + compute_layouts(&mut air); + let mut air = monomorphize(air).unwrap(); + passes::copy_elim::eliminate_copies(&mut air); + + let f = func(&air, "align_probe"); + let params: HashSet = f.params.iter().map(|p| p.id).collect(); + + let has_param_copy = f.blocks.iter().any(|b| { + b.stmts.iter().any(|s| { + matches!(&s.kind, + AirStmtKind::Assign { + place: Place::Local(dst), + rvalue: Rvalue::Use(Operand::Copy(src) | Operand::Move(src)), + } + if params.contains(src) && !params.contains(dst) + ) + }) + }); + assert!( + !has_param_copy, + "copy elimination should remove param-to-local copies" + ); +} + +#[test] +fn copy_elim_keeps_reassigned_param_copy() { + let mut air = lower_source( + r#" +fn keep_copy(x: i64) -> i64 { + let mut y: i64 = x + y = y + 1 + return y +} +"#, + ); + compute_layouts(&mut air); + let mut air = monomorphize(air).unwrap(); + aelys_air::passes::copy_elim::eliminate_copies(&mut air); + + let f = func(&air, "keep_copy"); + let params: HashSet = f.params.iter().map(|p| p.id).collect(); + + let has_param_copy = f.blocks.iter().any(|b| { + b.stmts.iter().any(|s| { + matches!(&s.kind, + AirStmtKind::Assign { + place: Place::Local(dst), + rvalue: Rvalue::Use(Operand::Copy(src) | Operand::Move(src)), + } + if params.contains(src) && !params.contains(dst) + ) + }) + }); + assert!( + has_param_copy, + "copy should remain when destination local is reassigned" + ); +} + +#[test] +fn stdlib_call_println_uses_str_operand() { + let air = lower_with_globals( + r#" +fn main() { + println("Hello") +} +"#, + &["print", "println"], + ); + + let f = func(&air, "main"); + let arg = f + .blocks + .iter() + .flat_map(|b| b.stmts.iter()) + .find_map(|s| match &s.kind { + AirStmtKind::CallVoid { + func: Callee::Named(n), + args, + } + | AirStmtKind::Assign { + rvalue: + Rvalue::Call { + func: Callee::Named(n), + args, + }, + .. + } if n == "println" => args.first(), + _ => None, + }) + .expect("expected a println call"); + + match arg { + Operand::Const(AirConst::Str(_)) => {} + Operand::Copy(id) | Operand::Move(id) => { + let ty = f + .params + .iter() + .find(|p| p.id == *id) + .map(|p| &p.ty) + .or_else(|| f.locals.iter().find(|l| l.id == *id).map(|l| &l.ty)) + .expect("println argument local should exist"); + assert_eq!(ty, &AirType::Str, "println argument must be `str`"); + } + _ => panic!("unexpected println arg operand kind"), + } +} + +#[test] +fn dead_locals_removes_align_probe_copy_targets() { + let mut air = lower_source( + r#" +fn align_probe(x: i64, y: i32, z: i16, w: i8, b: bool, f: f32, d: f64, p: string) -> i64 { + let a64: i64 = x + let a32: i32 = y + let a16: i16 = z + let a8: i8 = w + let ab: bool = b + let af32: f32 = f + let af64: f64 = d + let sp: string = p + return a64 + (a32 as i64) + (a16 as i64) + (a8 as i64) + (ab as i64) + (af32 as i64) + (af64 as i64) +} +"#, + ); + compute_layouts(&mut air); + let mut air = monomorphize(air).unwrap(); + passes::copy_elim::eliminate_copies(&mut air); + passes::dead_locals::eliminate_dead_locals(&mut air); + + let f = func(&air, "align_probe"); + let local_ids: HashSet = f.locals.iter().map(|l| l.id.0).collect(); + for id in 8..=14 { + assert!( + !local_ids.contains(&id), + "local %{} should be removed by dead_locals", + id + ); + } +} + +#[test] +fn multi_instantiation_generic_dispatches_correctly() { + let air = lower_source( + r#" +fn identity(x: T) -> T { + return x +} +fn caller() -> i64 { + let a: i32 = identity(42 as i32) + let b: i64 = identity(100) + return (a as i64) + b +} +"#, + ); + + let mut program = air; + compute_layouts(&mut program); + let program = monomorphize(program).unwrap(); + + // Both instantiations should exist + let mono_i32 = program + .functions + .iter() + .find(|f| f.name.contains("__mono_identity_i32")); + let mono_i64 = program + .functions + .iter() + .find(|f| f.name.contains("__mono_identity_i64")); + assert!( + mono_i32.is_some(), + "expected __mono_identity_i32, found: {:?}", + program + .functions + .iter() + .map(|f| &f.name) + .collect::>() + ); + assert!( + mono_i64.is_some(), + "expected __mono_identity_i64, found: {:?}", + program + .functions + .iter() + .map(|f| &f.name) + .collect::>() + ); + + // verify the i32 instance has i32 param/return, and the i64 instance has i64 + let mono_i32 = mono_i32.unwrap(); + assert_eq!(mono_i32.params[0].ty, AirType::I32); + assert_eq!(mono_i32.ret_ty, AirType::I32); + let mono_i64 = mono_i64.unwrap(); + assert_eq!(mono_i64.params[0].ty, AirType::I64); + assert_eq!(mono_i64.ret_ty, AirType::I64); + + // verify call sites too are rewritten to the correct mangled names + let caller = func(&program, "caller"); + let call_targets: Vec = caller + .blocks + .iter() + .flat_map(|b| { + b.stmts + .iter() + .filter_map(|s| match &s.kind { + AirStmtKind::Assign { + rvalue: + Rvalue::Call { + func: Callee::Named(n), + .. + }, + .. + } => Some(n.clone()), + _ => None, + }) + .chain(match &b.terminator { + AirTerminator::Invoke { + func: Callee::Named(n), + .. + } => Some(n.clone()), + _ => None, + }) + }) + .collect(); + + assert!( + call_targets + .iter() + .any(|n| n.contains("__mono_identity_i32")), + "caller should call __mono_identity_i32, found calls: {:?}", + call_targets + ); + assert!( + call_targets + .iter() + .any(|n| n.contains("__mono_identity_i64")), + "caller should call __mono_identity_i64, found calls: {:?}", + call_targets + ); +} + +// AIR validation pass tests + +/// helper: build a minimal valid AirProgram with one function +fn make_valid_program() -> AirProgram { + AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "test_fn".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![AirLocal { + id: LocalId(0), + ty: AirType::I64, + name: Some("_ret".to_string()), + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Return(Some(Operand::Const(AirConst::Int( + 0, + AirIntSize::I64, + )))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + } +} + +#[test] +fn validate_rejects_void_local_non_return() { + let mut program = make_valid_program(); + // add a Void-typed local that is not the return position. + program.functions[0].locals.push(AirLocal { + id: LocalId(1), + ty: AirType::Void, + name: Some("bad_local".to_string()), + is_mut: false, + span: None, + }); + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for Void local" + ); + let errors = result.unwrap_err(); + assert_eq!( + errors.len(), + 1, + "expected exactly 1 error, got {}", + errors.len() + ); + assert!( + matches!( + &errors[0].detail, + AirValidationDetail::VoidLocal { local_id: 1, .. } + ), + "expected VoidLocal error for local %1, got: {:?}", + errors[0].detail + ); + assert_eq!(errors[0].function_name, "test_fn"); +} + +#[test] +fn validate_accepts_void_return_local_on_void_function() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "void_fn".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Void, + locals: vec![AirLocal { + id: LocalId(0), + ty: AirType::Void, + name: Some("_ret".to_string()), + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Return(None), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let result = validate_air(&program); + assert!( + result.is_ok(), + "void return local on void function should be valid" + ); +} + +#[test] +fn validate_rejects_void_param() { + let mut program = make_valid_program(); + program.functions[0].params.push(AirParam { + id: LocalId(10), + ty: AirType::Void, + name: "bad_param".to_string(), + span: None, + }); + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for Void param" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::VoidLocal { local_id: 10, .. } + )), + "expected VoidLocal error for param %10, got: {:?}", + errors + ); +} + +#[test] +fn function_param_call_lowers_to_indirect_call() { + let air = lower_source( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { + return f(x) +} +"#, + ); + let f = func(&air, "apply"); + let has_indirect = f.blocks.iter().any(|b| { + b.stmts.iter().any(|s| { + matches!( + &s.kind, + AirStmtKind::Assign { + rvalue: Rvalue::Call { + func: Callee::FnPtr(_), + .. + }, + .. + } + ) + }) + }); + assert!( + has_indirect, + "expected call through function parameter to lower as Callee::FnPtr" + ); +} + +#[test] +fn struct_fnptr_field_call_lowers_to_indirect_call() { + let air = lower_source( + r#" +struct Holder { + f: fn(i64) -> i64, +} + +fn inc(x: i64) -> i64 { + return x + 1 +} + +fn main() -> i64 { + let h = Holder { f: inc } + return h.f(41) +} +"#, + ); + let f = func(&air, "main"); + let has_bad_named_call = f.blocks.iter().any(|b| { + b.stmts.iter().any(|s| { + matches!( + &s.kind, + AirStmtKind::Assign { + rvalue: Rvalue::Call { + func: Callee::Named(name), + .. + }, + .. + } if name == "h.f" + ) + }) + }); + let has_indirect = f.blocks.iter().any(|b| { + b.stmts.iter().any(|s| { + matches!( + &s.kind, + AirStmtKind::Assign { + rvalue: Rvalue::Call { + func: Callee::FnPtr(_), + .. + }, + .. + } + ) + }) + }); + assert!( + !has_bad_named_call, + "struct fnptr field call must not lower as direct named call" + ); + assert!( + has_indirect, + "expected struct fnptr field call to lower as Callee::FnPtr" + ); +} + +#[test] +fn function_identifier_as_value_lowers_to_fnref() { + let air = lower_source( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { + return f(x) +} +fn inc(x: i64) -> i64 { + return x + 1 +} +fn main() -> i64 { + return apply(inc, 5) +} +"#, + ); + let f = func(&air, "main"); + let has_closure_create = f.blocks.iter().any(|b| { + b.stmts.iter().any(|s| { + matches!( + &s.kind, + AirStmtKind::Assign { + rvalue: Rvalue::ClosureCreate { fn_name, .. }, + .. + } if fn_name == "inc" + ) + }) + }); + assert!( + has_closure_create, + "expected function identifier value to materialize from ClosureCreate(\"inc\")" + ); +} + +#[test] +fn validate_rejects_undeclared_block_reference() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "bad_block_ref".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Void, + locals: vec![], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + // References block bb99 which does not exist. + terminator: AirTerminator::Goto(BlockId(99)), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for undeclared block" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::UndeclaredBlock { block_id: 99, .. } + )), + "expected UndeclaredBlock error for bb99, got: {:?}", + errors + ); +} + +#[test] +fn validate_rejects_undeclared_local_reference() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "bad_local_ref".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![AirLocal { + id: LocalId(0), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + // returns a reference to local %42 which doesn't exist + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(42)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for undeclared local" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::UndeclaredLocal { local_id: 42, .. } + )), + "expected UndeclaredLocal error for %42, got: {:?}", + errors + ); +} + +#[test] +fn validate_accepts_valid_lowered_program() { + // real program lowered from source should pass validation. + let air = lower_source( + r#" +fn add(a: i64, b: i64) -> i64 { + return a + b +} +"#, + ); + let result = validate_air(&air); + assert!( + result.is_ok(), + "valid lowered program should pass validation, errors: {:?}", + result.err() + ); +} + +#[test] +fn validate_accepts_valid_program_after_full_pipeline() { + // full pipeline: lower -> layouts -> mono -> copy_elim -> dead_locals -> validate + let mut air = lower_source( + r#" +fn identity(x: T) -> T { + return x +} +fn caller() -> i32 { + let v: i32 = 42 + return identity(v) +} +"#, + ); + compute_layouts(&mut air); + let mut air = monomorphize(air).unwrap(); + passes::copy_elim::eliminate_copies(&mut air); + passes::dead_locals::eliminate_dead_locals(&mut air); + + let result = validate_air(&air); + assert!( + result.is_ok(), + "fully-pipelined program should pass validation, errors: {:?}", + result.err() + ); +} + +#[test] +fn validate_collects_multiple_errors() { + // a program with multiple violations should report all of them + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "multi_bad".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![ + AirLocal { + id: LocalId(0), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(1), + ty: AirType::Void, + name: Some("void1".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(2), + ty: AirType::Void, + name: Some("void2".to_string()), + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Return(Some(Operand::Const(AirConst::Int( + 0, + AirIntSize::I64, + )))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let result = validate_air(&program); + assert!(result.is_err()); + let errors = result.unwrap_err(); + assert_eq!( + errors.len(), + 2, + "expected 2 VoidLocal errors (for %1 and %2), got {}", + errors.len() + ); +} + +#[test] +fn validate_skips_extern_functions() { + // Extern functions have no body and should not be validated. + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "extern_fn".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![AirParam { + id: LocalId(0), + ty: AirType::I64, + name: "x".to_string(), + span: None, + }], + ret_ty: AirType::Void, + locals: vec![], + blocks: vec![], // empty body is OK for extern + is_extern: true, + calling_conv: CallingConv::C, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let result = validate_air(&program); + assert!( + result.is_ok(), + "extern functions should be skipped, errors: {:?}", + result.err() + ); +} + +// opaque type validation tests + +#[test] +fn validate_rejects_opaque_local() { + let mut program = make_valid_program(); + program.functions[0].locals.push(AirLocal { + id: LocalId(1), + ty: AirType::Opaque, + name: Some("unresolved_dynamic".to_string()), + is_mut: false, + span: None, + }); + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for Opaque local" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::OpaqueType { local_id: 1, .. } + )), + "expected OpaqueType error for local %1, got: {:?}", + errors + ); +} + +#[test] +fn validate_rejects_opaque_param() { + let mut program = make_valid_program(); + program.functions[0].params.push(AirParam { + id: LocalId(10), + ty: AirType::Opaque, + name: "opaque_param".to_string(), + span: None, + }); + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for Opaque param" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::OpaqueType { local_id: 10, .. } + )), + "expected OpaqueType error for param %10, got: {:?}", + errors + ); +} + +#[test] +fn validate_rejects_opaque_nested_in_array() { + let mut program = make_valid_program(); + program.functions[0].locals.push(AirLocal { + id: LocalId(2), + ty: AirType::Array(Box::new(AirType::Opaque), 5), + name: Some("opaque_array".to_string()), + is_mut: false, + span: None, + }); + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for Opaque nested in Array" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::OpaqueType { local_id: 2, .. } + )), + "expected OpaqueType error for local %2, got: {:?}", + errors + ); +} + +#[test] +fn validate_rejects_opaque_struct_field() { + let program = AirProgram { + functions: vec![], + structs: vec![AirStructDef { + name: "BadStruct".to_string(), + type_params: vec![], + fields: vec![AirStructField { + name: "unresolved".to_string(), + ty: AirType::Opaque, + offset: Some(0), + }], + is_closure_env: false, + span: None, + }], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let result = validate_air(&program); + assert!( + result.is_err(), + "expected validation to fail for Opaque struct field" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::OpaqueStructField { + struct_name, + field_name, + } if struct_name == "BadStruct" && field_name == "unresolved" + )), + "expected OpaqueStructField error, got: {:?}", + errors + ); +} + +#[test] +fn validate_opaque_does_not_appear_after_monomorphization() { + // a generic function's return type starts as Dynamic -> Opaque in AIR, + // but monomorphization should replace it with the concrete type + + // After the full pipeline, validation should pass + let mut air = lower_source( + r#" +fn identity(x: T) -> T { + return x +} +fn caller() -> i64 { + return identity(42) +} +"#, + ); + compute_layouts(&mut air); + let mut air = monomorphize(air).unwrap(); + passes::copy_elim::eliminate_copies(&mut air); + passes::dead_locals::eliminate_dead_locals(&mut air); + + let result = validate_air(&air); + assert!( + result.is_ok(), + "monomorphized generic call should not have Opaque types, errors: {:?}", + result.err() + ); +} + +#[test] +fn validate_print_builtin_does_not_produce_opaque_local() { + // print/println returns Dynamic, but AIR lowering should emit CallVoid + // for their calls instead of creating an Opaque-typed temp local. + let mut air = lower_with_globals( + r#" +fn main() { + println("hello world") +} +"#, + &["print", "println"], + ); + compute_layouts(&mut air); + let mut air = monomorphize(air).unwrap(); + passes::copy_elim::eliminate_copies(&mut air); + passes::dead_locals::eliminate_dead_locals(&mut air); + + let result = validate_air(&air); + assert!( + result.is_ok(), + "println call should not produce Opaque locals, errors: {:?}", + result.err() + ); + + // verify no locals have Opaque type. + let f = func(&air, "main"); + for local in &f.locals { + assert_ne!( + local.ty, + AirType::Opaque, + "local %{} should not have Opaque type after pipeline", + local.id.0 + ); + } +} + +// Tuple/Range -> Opaque (caught by validation) + +/// Simulates what happens when a Tuple type survives to AIR: +/// +/// the lowering now produces Opaque instead of Void. if such a local ever reaches the validation pass, it should be rejected with an OpaqueType error +/// here we constructs a synthetic AIR program with an Opaque local representing a Tuple or Range that leaked through sema) +#[test] +fn validate_rejects_opaque_from_tuple_or_range() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "tuple_leak".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Void, + locals: vec![AirLocal { + id: LocalId(0), + ty: AirType::Opaque, + name: Some("leaked_tuple".to_string()), + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Return(None), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let result = validate_air(&program); + assert!( + result.is_err(), + "Opaque local (from Tuple/Range) should be rejected by validation" + ); + let errors = result.unwrap_err(); + assert!( + errors.iter().any(|e| matches!( + &e.detail, + AirValidationDetail::OpaqueType { + local_id: 0, + local_name: Some(name), + } if name == "leaked_tuple" + )), + "expected OpaqueType error for leaked_tuple, got: {:?}", + errors + ); +} + +/// verifies that null-typed locals pass validation (they are Ptr(Void), not bare Void, so they have a valid non-zero size) +#[test] +fn validate_accepts_null_typed_local() { + let air = lower_source( + r#" +fn use_null() { + let x = null +} +"#, + ); + let result = validate_air(&air); + assert!( + result.is_ok(), + "null-typed local (Ptr(Void)) should pass validation, errors: {:?}", + result.err() + ); +} + +#[test] +fn validate_rejects_ambiguous_generic_unit_variant_after_mono() { + let option_enum = AirEnumDef { + name: "Option".to_string(), + type_params: vec![TypeParamId(0)], + variants: vec![ + AirEnumVariant { + name: "Some".to_string(), + tag: 0, + payload: vec![AirType::Param(TypeParamId(0))], + }, + AirEnumVariant { + name: "None".to_string(), + tag: 1, + payload: vec![], + }, + ], + span: None, + }; + + let seed_i64 = AirFunction { + id: FunctionId(0), + name: "seed_i64".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Enum("__mono_Option_i64".to_string()), + locals: vec![ + AirLocal { + id: LocalId(0), + ty: AirType::Enum("__mono_Option_i64".to_string()), + name: Some("ret".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(1), + ty: AirType::Enum("__mono_Option_i64".to_string()), + name: Some("value".to_string()), + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::EnumInit { + enum_name: "Option".to_string(), + variant: "Some".to_string(), + tag: 0, + payload: vec![Operand::Const(AirConst::Int(1, AirIntSize::I64))], + }, + }, + span: None, + }], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(1)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }; + + let seed_str = AirFunction { + id: FunctionId(1), + name: "seed_str".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Enum("__mono_Option_str".to_string()), + locals: vec![ + AirLocal { + id: LocalId(0), + ty: AirType::Enum("__mono_Option_str".to_string()), + name: Some("ret".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(1), + ty: AirType::Enum("__mono_Option_str".to_string()), + name: Some("value".to_string()), + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::EnumInit { + enum_name: "Option".to_string(), + variant: "Some".to_string(), + tag: 0, + payload: vec![Operand::Const(AirConst::Str("hello".to_string()))], + }, + }, + span: None, + }], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(1)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }; + + let ambiguous_none = AirFunction { + id: FunctionId(2), + name: "ambiguous_none".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Void, + locals: vec![AirLocal { + id: LocalId(0), + ty: AirType::Enum("Option".to_string()), + name: Some("ambiguous".to_string()), + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(0)), + rvalue: Rvalue::EnumInit { + enum_name: "Option".to_string(), + variant: "None".to_string(), + tag: 1, + payload: vec![], + }, + }, + span: None, + }], + terminator: AirTerminator::Return(None), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }; + + let program = AirProgram { + functions: vec![seed_i64, seed_str, ambiguous_none], + structs: vec![], + enums: vec![option_enum], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let errors = match monomorphize(program) { + Err(e) => e, + Ok(_) => panic!("ambiguous generic unit variant should be rejected during monomorphization"), + }; + assert!( + errors.iter().any(|e| e.contains("ambiguous unit variant")), + "expected ambiguous unit variant error, got: {:?}", + errors + ); +} + +#[test] +fn monomorphize_distinguishes_fnptr_calling_conventions_in_enum_type_args() { + let program = AirProgram { + functions: vec![ + AirFunction { + id: FunctionId(0), + name: "fast_fn".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![], + blocks: vec![], + is_extern: true, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }, + AirFunction { + id: FunctionId(1), + name: "c_fn".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![], + blocks: vec![], + is_extern: true, + calling_conv: CallingConv::C, + attributes: default_attribs(), + span: None, + }, + AirFunction { + id: FunctionId(2), + name: "seed".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Void, + locals: vec![ + AirLocal { + id: LocalId(0), + ty: AirType::FnPtr { + params: vec![], + ret: Box::new(AirType::I64), + conv: CallingConv::Aelys, + }, + name: Some("fast".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(1), + ty: AirType::FnPtr { + params: vec![], + ret: Box::new(AirType::I64), + conv: CallingConv::C, + }, + name: Some("c".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(2), + ty: AirType::Enum("Holder".to_string()), + name: Some("aelys_holder".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(3), + ty: AirType::Enum("Holder".to_string()), + name: Some("c_holder".to_string()), + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(0)), + rvalue: Rvalue::Use(Operand::Const(AirConst::FnRef( + "fast_fn".to_string(), + ))), + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::Use(Operand::Const(AirConst::FnRef( + "c_fn".to_string(), + ))), + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::EnumInit { + enum_name: "Holder".to_string(), + variant: "Value".to_string(), + tag: 0, + payload: vec![Operand::Copy(LocalId(0))], + }, + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(3)), + rvalue: Rvalue::EnumInit { + enum_name: "Holder".to_string(), + variant: "Value".to_string(), + tag: 0, + payload: vec![Operand::Copy(LocalId(1))], + }, + }, + span: None, + }, + ], + terminator: AirTerminator::Return(None), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }, + ], + structs: vec![], + enums: vec![AirEnumDef { + name: "Holder".to_string(), + type_params: vec![TypeParamId(0)], + variants: vec![ + AirEnumVariant { + name: "Value".to_string(), + tag: 0, + payload: vec![AirType::Param(TypeParamId(0))], + }, + AirEnumVariant { + name: "Empty".to_string(), + tag: 1, + payload: vec![], + }, + ], + span: None, + }], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let air = monomorphize(program).unwrap(); + let holder_defs: Vec<_> = air + .enums + .iter() + .filter(|def| def.name.starts_with("__mono_Holder_")) + .collect(); + assert_eq!( + holder_defs.len(), + 2, + "distinct fnptr calling conventions must produce distinct Holder monos" + ); + assert!( + holder_defs.iter().any(|def| matches!( + &def.variants[0].payload[..], + [AirType::FnPtr { conv: CallingConv::Aelys, .. }] + )), + "missing Aelys fnptr instantiation: {:?}", + holder_defs.iter().map(|def| &def.name).collect::>() + ); + assert!( + holder_defs.iter().any(|def| matches!( + &def.variants[0].payload[..], + [AirType::FnPtr { conv: CallingConv::C, .. }] + )), + "missing C fnptr instantiation: {:?}", + holder_defs.iter().map(|def| &def.name).collect::>() + ); +} diff --git a/aelys/tests/allocation_guard_tests.rs b/aelys/tests/allocation_guard_tests.rs deleted file mode 100644 index ced1af2..0000000 --- a/aelys/tests/allocation_guard_tests.rs +++ /dev/null @@ -1,56 +0,0 @@ -use std::fs; -use std::path::Path; - -/// Ensure no runtime modules bypass VM allocation guards by calling Heap::alloc* directly. -#[test] -fn vm_modules_do_not_bypass_allocation_guards() { - let vm_dir = Path::new("../runtime/src/vm"); - let allowed = [ - "vm.rs", - "heap.rs", - "manual_heap.rs", - "config.rs", - "args.rs", - "mod.rs", - "alloc.rs", - ]; - - fn walk(dir: &Path, files: &mut Vec) { - for entry in fs::read_dir(dir).expect("dir exists") { - let entry = entry.expect("read entry"); - let path = entry.path(); - if path.is_dir() { - walk(&path, files); - } else { - files.push(path); - } - } - } - - let mut files = Vec::new(); - walk(vm_dir, &mut files); - - for file in files { - let name = file.file_name().and_then(|n| n.to_str()).unwrap_or(""); - if allowed.contains(&name) { - continue; - } - - let content = fs::read_to_string(&file).expect("read file"); - let mut bad = Vec::new(); - if content.contains("heap.alloc(") || content.contains("heap_mut().alloc") { - bad.push("heap.alloc"); - } - if content.contains("heap.alloc_function") || content.contains("heap.alloc_native") { - bad.push("heap.alloc_function/native"); - } - - if !bad.is_empty() { - panic!( - "direct heap allocation {:?} found in {}", - bad, - file.display() - ); - } - } -} diff --git a/aelys/tests/array_syntax_tests.rs b/aelys/tests/array_syntax_tests.rs new file mode 100644 index 0000000..9617a61 --- /dev/null +++ b/aelys/tests/array_syntax_tests.rs @@ -0,0 +1,472 @@ +//! not yet implemented yet: +//! - Stack array -> slice coercion + +use aelys_driver::compile_file_with_llvm; +use aelys_opt::OptimizationLevel; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use std::fs; +use tempfile::tempdir; + +fn compile_to_verified_ir(source: &str) -> String { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write(&source_path, source).expect("source should be written"); + compile_file_with_llvm(&source_path, OptimizationLevel::None, true) + .expect("llvm backend compilation should succeed"); + let ll_path = source_path.with_extension("ll"); + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + ir +} + +#[test] +fn array_literal_inferred() { + let ir = compile_to_verified_ir( + r#" +fn second() -> i64 { + let arr = [10, 20, 30] + return arr[1] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "should allocate [3 x i64] on the stack:\n{ir}" + ); + assert!( + ir.contains("store i64 10"), + "should store first element 10:\n{ir}" + ); + assert!( + ir.contains("store i64 20"), + "should store second element 20:\n{ir}" + ); + assert!( + ir.contains("store i64 30"), + "should store third element 30:\n{ir}" + ); + assert!( + ir.contains("getelementptr"), + "should generate GEP for index access:\n{ir}" + ); + // No runtime array calls + assert!( + !ir.contains("__aelys_array_new"), + "should not call __aelys_array_new (stack-allocated):\n{ir}" + ); +} + +#[test] +fn array_literal_annotated() { + let ir = compile_to_verified_ir( + r#" +fn second() -> i64 { + let arr: [i64; 3] = [10, 20, 30] + return arr[1] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "annotated array should allocate [3 x i64]:\n{ir}" + ); + assert!( + ir.contains("store i64 10"), + "should store first element 10:\n{ir}" + ); + assert!( + ir.contains("store i64 20"), + "should store second element 20:\n{ir}" + ); + assert!( + ir.contains("store i64 30"), + "should store third element 30:\n{ir}" + ); +} + +#[test] +fn array_annotation_f64() { + let ir = compile_to_verified_ir( + r#" +fn first() -> f64 { + let arr: [f64; 2] = [1.0, 2.0] + return arr[0] +} +"#, + ); + assert!( + ir.contains("alloca [2 x double]"), + "f64 annotated array should allocate [2 x double]:\n{ir}" + ); + assert!( + ir.contains("store double"), + "should store double values:\n{ir}" + ); +} + +#[test] +fn array_repeat_syntax() { + let ir = compile_to_verified_ir( + r#" +fn first_zero() -> i64 { + let zeros = [0; 100] + return zeros[0] +} +"#, + ); + assert!( + ir.contains("alloca [100 x i64]"), + "should allocate [100 x i64] on the stack:\n{ir}" + ); + let store_count = ir.matches("store i64 0").count(); + assert!( + store_count >= 100, + "[0; 100] should generate at least 100 stores, got {store_count}:\n{ir}" + ); +} + +#[test] +fn array_repeat_syntax_nonzero_fill() { + let ir = compile_to_verified_ir( + r#" +fn first_val() -> i64 { + let arr = [42; 5] + return arr[0] +} +"#, + ); + assert!( + ir.contains("alloca [5 x i64]"), + "should allocate [5 x i64] on the stack:\n{ir}" + ); + let store_count = ir.matches("store i64 42").count(); + assert!( + store_count >= 5, + "[42; 5] should store 42 into all 5 elements, got {store_count}:\n{ir}" + ); +} + +#[test] +fn array_repeat_syntax_small() { + let ir = compile_to_verified_ir( + r#" +fn val() -> i64 { + let arr = [7; 3] + return arr[2] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "should allocate [3 x i64]:\n{ir}" + ); + let store_count = ir.matches("store i64 7").count(); + assert!( + store_count >= 3, + "[7; 3] should have 3 stores of 7, got {store_count}:\n{ir}" + ); +} + +#[test] +fn array_f64() { + let ir = compile_to_verified_ir( + r#" +fn first_f() -> f64 { + let arr = [1.5, 2.7, 3.14] + return arr[0] +} +"#, + ); + assert!( + ir.contains("alloca [3 x double]"), + "f64 array should allocate [3 x double]:\n{ir}" + ); + assert!( + ir.contains("store double"), + "should store double values:\n{ir}" + ); + assert!( + ir.contains("getelementptr"), + "should use GEP for index:\n{ir}" + ); +} + +#[test] +fn array_bool() { + let ir = compile_to_verified_ir( + r#" +fn first_b() -> bool { + let arr = [true, false, true] + return arr[0] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i1]"), + "bool array should allocate [3 x i1]:\n{ir}" + ); + assert!(ir.contains("store i1"), "should store i1 values:\n{ir}"); +} + +#[test] +fn array_param_syntax() { + let ir = compile_to_verified_ir( + r#" +fn process(data: [i64; 3]) -> i64 { + return data[0] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "array param should allocate [3 x i64]:\n{ir}" + ); + assert!( + ir.contains("getelementptr"), + "should generate GEP for index access:\n{ir}" + ); +} + +#[test] +fn array_return_compiles() { + let ir = compile_to_verified_ir( + r#" +fn make() -> [i64; 3] { + let arr = [1, 2, 3] + return arr +} +"#, + ); + assert!(ir.contains("ret"), "should emit a return: {ir}"); +} + +#[test] +#[should_panic(expected = "stack array too large")] +fn array_stack_overflow_repeat() { + compile_to_verified_ir( + r#" +fn huge() -> i64 { + let big = [0; 200000] + return big[0] +} +"#, + ); +} + +#[test] +#[should_panic(expected = "stack array too large")] +fn array_stack_overflow_let_optimization() { + // tests the stack size check in the Let-optimization path (stmts.rs) + compile_to_verified_ir( + r#" +fn huge_let() -> i64 { + let big = [0; 500000] + return big[0] +} +"#, + ); +} + +#[test] +fn array_index_read() { + let ir = compile_to_verified_ir( + r#" +fn third() -> i64 { + let arr = [100, 200, 300] + return arr[2] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "should use stack allocation:\n{ir}" + ); + assert!( + ir.contains("getelementptr"), + "index read should use GEP:\n{ir}" + ); + assert!( + ir.contains("@__aelys_panic"), + "should have bounds check panic:\n{ir}" + ); +} + +#[test] +fn array_index_write() { + let ir = compile_to_verified_ir( + r#" +fn mutate() -> i64 { + let mut arr = [10, 20, 30] + arr[1] = 99 + return arr[1] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "should use stack allocation:\n{ir}" + ); + assert!( + ir.contains("store i64 99"), + "index write should store 99:\n{ir}" + ); +} + +#[test] +fn array_foreach_known_length() { + let ir = compile_to_verified_ir( + r#" +fn sum_arr() -> i64 { + let arr = [1, 2, 3] + let mut total: i64 = 0 + for x in arr { + total = total + x + } + return total +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "array should be stack-allocated:\n{ir}" + ); + assert!( + !ir.contains("__aelys_len"), + "for-each on stack array should use known length, not __aelys_len:\n{ir}" + ); +} + +#[test] +fn array_let_optimization_literal() { + let ir = compile_to_verified_ir( + r#" +fn get() -> i64 { + let arr = [5, 10, 15] + return arr[0] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "should allocate [3 x i64]:\n{ir}" + ); + assert!(ir.contains("store i64 5"), "should store 5:\n{ir}"); + assert!(ir.contains("store i64 10"), "should store 10:\n{ir}"); + assert!(ir.contains("store i64 15"), "should store 15:\n{ir}"); +} + +#[test] +fn array_let_optimization_repeat() { + let ir = compile_to_verified_ir( + r#" +fn get() -> i64 { + let arr = [99; 10] + return arr[5] +} +"#, + ); + assert!( + ir.contains("alloca [10 x i64]"), + "should allocate [10 x i64]:\n{ir}" + ); + let store_count = ir.matches("store i64 99").count(); + assert!( + store_count >= 10, + "[99; 10] should produce at least 10 stores, got {store_count}:\n{ir}" + ); +} + +#[test] +fn array_single_element() { + let ir = compile_to_verified_ir( + r#" +fn only() -> i64 { + let arr = [42] + return arr[0] +} +"#, + ); + assert!( + ir.contains("alloca [1 x i64]"), + "single-element array should allocate [1 x i64]:\n{ir}" + ); + assert!(ir.contains("store i64 42"), "should store 42:\n{ir}"); +} + +#[test] +fn array_in_loop_body() { + let ir = compile_to_verified_ir( + r#" +fn use_in_loop() -> i64 { + let mut result: i64 = 0 + let mut i: i64 = 0 + while i < 3 { + let arr = [i, i, i] + result = result + arr[0] + i = i + 1 + } + return result +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "array in loop should still be stack-allocated:\n{ir}" + ); +} + +#[test] +fn array_with_expressions() { + let ir = compile_to_verified_ir( + r#" +fn expr_arr(a: i64) -> i64 { + let arr = [a + 1, a * 2, a - 3] + return arr[1] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "expression array should allocate [3 x i64]:\n{ir}" + ); + assert!( + ir.contains("add i64"), + "should generate add for a + 1:\n{ir}" + ); + assert!( + ir.contains("mul i64"), + "should generate mul for a * 2:\n{ir}" + ); + assert!( + ir.contains("sub i64"), + "should generate sub for a - 3:\n{ir}" + ); +} + +#[test] +fn array_struct_elements() { + let ir = compile_to_verified_ir( + r#" +struct Pair { x: i64, y: i64 } +fn make_pairs() -> i64 { + let a = Pair { x: 1, y: 2 } + let b = Pair { x: 3, y: 4 } + return a.x + b.y +} +"#, + ); + // just verify the struct-related code compiles; array-of-structs is a stretch goal, so this test validates the struct baseline. + assert!( + ir.contains("getelementptr"), + "struct field access should generate GEP:\n{ir}" + ); +} diff --git a/aelys/tests/array_tests.rs b/aelys/tests/array_tests.rs deleted file mode 100644 index 0bf6e5a..0000000 --- a/aelys/tests/array_tests.rs +++ /dev/null @@ -1,1183 +0,0 @@ -mod common; - -use aelys_bytecode::{AelysArray, AelysVec, ArrayData, TypeTag, Value}; -use common::{ - assert_aelys_bool, assert_aelys_error_contains, assert_aelys_int, assert_aelys_str, run_aelys, - run_aelys_ok, -}; - -#[test] -fn test_array_new_ints() { - let arr = AelysArray::new_ints(5); - assert_eq!(arr.len(), 5); - assert!(!arr.is_empty()); - assert_eq!(arr.type_tag(), TypeTag::Int); - - // Zero-initialized - for i in 0..5 { - assert_eq!(arr.get(i), Some(Value::int(0))); - } -} - -#[test] -fn test_array_new_floats() { - let arr = AelysArray::new_floats(3); - assert_eq!(arr.len(), 3); - assert_eq!(arr.type_tag(), TypeTag::Float); - - for i in 0..3 { - assert_eq!(arr.get(i), Some(Value::float(0.0))); - } -} - -#[test] -fn test_array_new_bools() { - let arr = AelysArray::new_bools(4); - assert_eq!(arr.len(), 4); - assert_eq!(arr.type_tag(), TypeTag::Bool); - - for i in 0..4 { - assert_eq!(arr.get(i), Some(Value::bool(false))); - } -} - -#[test] -fn test_array_new_objects() { - let arr = AelysArray::new_objects(2); - assert_eq!(arr.len(), 2); - assert_eq!(arr.type_tag(), TypeTag::Object); - - for i in 0..2 { - assert!(arr.get(i).unwrap().is_null()); - } -} - -#[test] -fn test_array_from_ints() { - let arr = AelysArray::from_ints(vec![1, 2, 3, 4, 5]); - assert_eq!(arr.len(), 5); - assert_eq!(arr.type_tag(), TypeTag::Int); - - assert_eq!(arr.get(0), Some(Value::int(1))); - assert_eq!(arr.get(2), Some(Value::int(3))); - assert_eq!(arr.get(4), Some(Value::int(5))); - assert_eq!(arr.get(5), None); // Out of bounds -} - -#[test] -fn test_array_from_floats() { - let arr = AelysArray::from_floats(vec![1.5, 2.5, 3.5]); - assert_eq!(arr.len(), 3); - assert_eq!(arr.type_tag(), TypeTag::Float); - - assert_eq!(arr.get(0), Some(Value::float(1.5))); - assert_eq!(arr.get(1), Some(Value::float(2.5))); - assert_eq!(arr.get(2), Some(Value::float(3.5))); -} - -#[test] -fn test_array_from_bools() { - let arr = AelysArray::from_bools(vec![true, false, true]); - assert_eq!(arr.len(), 3); - assert_eq!(arr.type_tag(), TypeTag::Bool); - - assert_eq!(arr.get(0), Some(Value::bool(true))); - assert_eq!(arr.get(1), Some(Value::bool(false))); - assert_eq!(arr.get(2), Some(Value::bool(true))); -} - -#[test] -fn test_array_set() { - let mut arr = AelysArray::new_ints(3); - - assert!(arr.set(0, Value::int(10))); - assert!(arr.set(1, Value::int(20))); - assert!(arr.set(2, Value::int(30))); - - assert_eq!(arr.get(0), Some(Value::int(10))); - assert_eq!(arr.get(1), Some(Value::int(20))); - assert_eq!(arr.get(2), Some(Value::int(30))); - - // Out of bounds set returns false - assert!(!arr.set(3, Value::int(40))); - - // Wrong type set returns false (ints array, trying to set float) - assert!(!arr.set(0, Value::float(1.5))); -} - -#[test] -fn test_array_empty() { - let arr = AelysArray::new_ints(0); - assert_eq!(arr.len(), 0); - assert!(arr.is_empty()); - assert_eq!(arr.get(0), None); -} - -#[test] -fn test_array_size_bytes() { - let arr_ints = AelysArray::from_ints(vec![1, 2, 3, 4]); - let arr_bools = AelysArray::from_bools(vec![true, false, true, false]); - - // Ints use 8 bytes/element, bools use 1 byte/element - // Size includes struct overhead - assert!(arr_ints.size_bytes() > arr_bools.size_bytes()); -} - -#[test] -fn test_vec_new_empty() { - let v = AelysVec::new_ints(); - assert_eq!(v.len(), 0); - assert!(v.is_empty()); - assert_eq!(v.type_tag(), TypeTag::Int); -} - -#[test] -fn test_vec_push_pop() { - let mut v = AelysVec::new_ints(); - - assert!(v.push(Value::int(1))); - assert!(v.push(Value::int(2))); - assert!(v.push(Value::int(3))); - - assert_eq!(v.len(), 3); - assert!(!v.is_empty()); - - assert_eq!(v.pop(), Some(Value::int(3))); - assert_eq!(v.pop(), Some(Value::int(2))); - assert_eq!(v.pop(), Some(Value::int(1))); - assert_eq!(v.pop(), None); - - assert!(v.is_empty()); -} - -#[test] -fn test_vec_push_wrong_type() { - let mut v = AelysVec::new_ints(); - - // Push wrong type returns false - assert!(!v.push(Value::float(1.5))); - assert!(!v.push(Value::bool(true))); - - // Vec remains unchanged - assert!(v.is_empty()); -} - -#[test] -fn test_vec_get_set() { - let mut v = AelysVec::from_ints(vec![10, 20, 30]); - - assert_eq!(v.get(0), Some(Value::int(10))); - assert_eq!(v.get(1), Some(Value::int(20))); - assert_eq!(v.get(2), Some(Value::int(30))); - assert_eq!(v.get(3), None); - - assert!(v.set(1, Value::int(25))); - assert_eq!(v.get(1), Some(Value::int(25))); - - // Out of bounds set - assert!(!v.set(5, Value::int(50))); -} - -#[test] -fn test_vec_reserve() { - let mut v = AelysVec::new_floats(); - assert_eq!(v.capacity(), 0); - - v.reserve(10); - assert!(v.capacity() >= 10); - assert!(v.is_empty()); // Reserve doesn't add elements -} - -#[test] -fn test_vec_clear() { - let mut v = AelysVec::from_bools(vec![true, false, true]); - assert_eq!(v.len(), 3); - - v.clear(); - assert!(v.is_empty()); - assert_eq!(v.len(), 0); -} - -#[test] -fn test_vec_shrink_to_fit() { - let mut v = AelysVec::new_ints(); - v.reserve(100); - assert!(v.capacity() >= 100); - - v.push(Value::int(1)); - v.shrink_to_fit(); - // Capacity should be reduced (implementation-dependent exact value) - assert!(v.capacity() >= 1); -} - -#[test] -fn test_vec_to_array() { - let v = AelysVec::from_ints(vec![1, 2, 3]); - let arr = v.to_array(); - - assert_eq!(arr.len(), 3); - assert_eq!(arr.type_tag(), TypeTag::Int); - assert_eq!(arr.get(0), Some(Value::int(1))); - assert_eq!(arr.get(1), Some(Value::int(2))); - assert_eq!(arr.get(2), Some(Value::int(3))); -} - -#[test] -fn test_vec_with_capacity() { - let v = AelysVec::with_capacity_ints(10); - assert!(v.is_empty()); - assert!(v.capacity() >= 10); -} - -#[test] -fn test_type_tag_from_u8() { - assert_eq!(TypeTag::from_u8(0), Some(TypeTag::Int)); - assert_eq!(TypeTag::from_u8(1), Some(TypeTag::Float)); - assert_eq!(TypeTag::from_u8(2), Some(TypeTag::Bool)); - assert_eq!(TypeTag::from_u8(3), Some(TypeTag::Object)); - assert_eq!(TypeTag::from_u8(4), None); - assert_eq!(TypeTag::from_u8(255), None); -} - -#[test] -fn test_array_data_accessors() { - let data_ints = ArrayData::Ints(vec![1, 2, 3].into_boxed_slice()); - assert!(data_ints.as_ints().is_some()); - assert!(data_ints.as_floats().is_none()); - assert!(data_ints.as_bools().is_none()); - assert!(data_ints.as_objects().is_none()); - - let data_floats = ArrayData::Floats(vec![1.0, 2.0].into_boxed_slice()); - assert!(data_floats.as_floats().is_some()); - assert!(data_floats.as_ints().is_none()); - - let data_bools = ArrayData::Bools(vec![1, 0, 1].into_boxed_slice()); - assert!(data_bools.as_bools().is_some()); - assert!(data_bools.as_floats().is_none()); - - let data_objects = ArrayData::Objects(vec![Value::null()].into_boxed_slice()); - assert!(data_objects.as_objects().is_some()); - assert!(data_objects.as_bools().is_none()); -} - -#[test] -fn test_array_data_mutable_accessors() { - let mut data = ArrayData::Ints(vec![1, 2, 3].into_boxed_slice()); - - if let Some(ints) = data.as_ints_mut() { - ints[0] = 100; - } - - assert_eq!(data.as_ints().unwrap()[0], 100); -} - -#[test] -fn test_e2e_empty_array_literal() { - let result = run_aelys_ok("let arr = []; 0"); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn test_e2e_int_array_literal() { - // Simple array with ints - assert_aelys_int("let arr = [1, 2, 3]; arr[0]", 1); - assert_aelys_int("let arr = [1, 2, 3]; arr[1]", 2); - assert_aelys_int("let arr = [1, 2, 3]; arr[2]", 3); -} - -#[test] -fn test_e2e_array_length() { - assert_aelys_int("let arr = [1, 2, 3, 4, 5]; arr.len()", 5); - assert_aelys_int("let arr = []; arr.len()", 0); -} - -#[test] -fn test_e2e_array_index_expression() { - // Index with computed expression - assert_aelys_int("let arr = [10, 20, 30]; let i = 1; arr[i]", 20); - assert_aelys_int("let arr = [10, 20, 30]; arr[1 + 1]", 30); -} - -#[test] -fn test_e2e_array_store() { - assert_aelys_int("let arr = [1, 2, 3]; arr[0] = 100; arr[0]", 100); - assert_aelys_int("let arr = [1, 2, 3]; arr[1] = 200; arr[1]", 200); -} - -#[test] -fn test_e2e_float_array() { - let result = run_aelys("let arr = [1.5, 2.5, 3.5]; arr[1]"); - assert_eq!(result.as_float(), Some(2.5)); -} - -#[test] -fn test_e2e_bool_array() { - assert_aelys_bool("let arr = [true, false, true]; arr[0]", true); - assert_aelys_bool("let arr = [true, false, true]; arr[1]", false); -} - -#[test] -fn test_e2e_nested_array_access() { - // Array in function - assert_aelys_int( - r#" - fn get_second(arr) { - arr[1] - } - let a = [10, 20, 30]; - get_second(a) - "#, - 20, - ); -} - -#[test] -fn test_e2e_array_in_loop() { - assert_aelys_int( - r#" - let arr = [1, 2, 3, 4, 5]; - let mut sum = 0; - let mut i = 0; - while i < arr.len() { - sum += arr[i]; - i++; - } - sum - "#, - 15, // 1+2+3+4+5 - ); -} - -#[test] -fn test_e2e_array_trailing_comma() { - // Trailing comma is allowed - assert_aelys_int("let arr = [1, 2, 3,]; arr[2]", 3); -} - -#[test] -fn test_e2e_empty_vec_literal() { - let result = run_aelys_ok("let v = Vec[]; 0"); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn test_e2e_vec_literal() { - assert_aelys_int("let v = Vec[1, 2, 3]; v[0]", 1); - assert_aelys_int("let v = Vec[1, 2, 3]; v[2]", 3); -} - -#[test] -fn test_e2e_vec_length() { - assert_aelys_int("let v = Vec[1, 2, 3, 4]; v.len()", 4); -} - -#[test] -fn test_e2e_vec_push() { - assert_aelys_int( - r#" - let v = Vec[1, 2]; - v.push(3); - v[2] - "#, - 3, - ); -} - -#[test] -fn test_e2e_vec_pop() { - assert_aelys_int( - r#" - let v = Vec[1, 2, 3]; - v.pop() - "#, - 3, - ); -} - -#[test] -fn test_e2e_vec_push_pop_sequence() { - assert_aelys_int( - r#" - let v = Vec[]; - v.push(10); - v.push(20); - v.push(30); - let a = v.pop(); - let b = v.pop(); - a + b - "#, - 50, // 30 + 20 - ); -} - -#[test] -fn test_e2e_vec_store() { - assert_aelys_int("let v = Vec[1, 2, 3]; v[1] = 100; v[1]", 100); -} - -#[test] -fn test_e2e_typed_array_int() { - assert_aelys_int("let arr = Array[1, 2, 3]; arr[1]", 2); -} - -#[test] -fn test_e2e_typed_array_float() { - let result = run_aelys("let arr = Array[1.0, 2.0, 3.0]; arr[2]"); - assert_eq!(result.as_float(), Some(3.0)); -} - -#[test] -fn test_e2e_typed_array_bool() { - assert_aelys_bool("let arr = Array[true, false]; arr[1]", false); -} - -#[test] -fn test_e2e_typed_vec_int() { - assert_aelys_int("let v = Vec[5, 10, 15]; v[0]", 5); -} - -#[test] -fn test_e2e_array_oob_read() { - assert_aelys_error_contains("let arr = [1, 2, 3]; arr[10]", "out of bounds"); -} - -#[test] -fn test_e2e_array_oob_write() { - assert_aelys_error_contains("let arr = [1, 2, 3]; arr[5] = 10", "out of bounds"); -} - -#[test] -fn test_e2e_array_negative_index() { - assert_aelys_error_contains("let arr = [1, 2, 3]; arr[-1]", ""); -} - -#[test] -fn test_e2e_vec_oob_read() { - assert_aelys_error_contains("let v = Vec[1, 2]; v[5]", "out of bounds"); -} - -#[test] -fn test_e2e_array_sum_loop() { - assert_aelys_int( - r#" - let arr = [10, 20, 30, 40, 50]; - let mut sum = 0; - let mut i = 0; - while i < arr.len() { - sum += arr[i]; - i++; - } - sum - "#, - 150, - ); -} - -#[test] -fn test_e2e_array_modify_in_loop() { - assert_aelys_int( - r#" - let arr = [1, 2, 3, 4, 5]; - let mut i = 0; - while i < arr.len() { - arr[i] *= 2; - i++; - } - arr[0] + arr[1] + arr[2] + arr[3] + arr[4] - "#, - 30, // 2+4+6+8+10 - ); -} - -#[test] -fn test_e2e_array_find_max() { - assert_aelys_int( - r#" - let arr = [5, 2, 9, 1, 7]; - let mut max = arr[0]; - let mut i = 1; - while i < arr.len() { - if arr[i] > max { - max = arr[i]; - } - i++; - } - max - "#, - 9, - ); -} - -#[test] -fn test_e2e_array_swap() { - assert_aelys_int( - r#" - let arr = [10, 20]; - let tmp = arr[0]; - arr[0] = arr[1]; - arr[1] = tmp; - arr[0] * 10 + arr[1] - "#, - 210, // 20*10 + 10 - ); -} - -#[test] -fn test_e2e_array_passed_to_function() { - assert_aelys_int( - r#" - fn sum_arr(a) -> int { - let mut s = 0; - let mut i = 0; - while i < a.len() { - s += a[i]; - i++; - } - return s - } - let arr = [1, 2, 3, 4]; - sum_arr(arr) - "#, - 10, - ); -} - -#[test] -fn test_e2e_array_returned_from_function() { - assert_aelys_int( - r#" - fn make_arr() { - return [100, 200, 300] - } - let a = make_arr(); - a[1] - "#, - 200, - ); -} - -#[test] -fn test_e2e_array_nested_access() { - assert_aelys_int( - r#" - let a = [1, 2, 3]; - let b = [10, 20, 30]; - a[0] + b[a[1]] - "#, - 31, // 1 + b[2] = 1 + 30 - ); -} - -#[test] -fn test_e2e_vec_build_and_sum() { - assert_aelys_int( - r#" - let v = Vec[]; - v.push(1); - v.push(2); - v.push(3); - v.push(4); - v.push(5); - let mut sum = 0; - let mut i = 0; - while i < v.len() { - sum += v[i]; - i++; - } - sum - "#, - 15, - ); -} - -#[test] -fn test_e2e_vec_pop_all() { - assert_aelys_int( - r#" - let v = Vec[10, 20, 30]; - let a = v.pop(); - let b = v.pop(); - let c = v.pop(); - a + b + c - "#, - 60, - ); -} - -#[test] -fn test_e2e_vec_modify_elements() { - assert_aelys_int( - r#" - let v = Vec[1, 2, 3]; - v[0] = 100; - v[1] = 200; - v[2] = 300; - v[0] + v[1] + v[2] - "#, - 600, - ); -} - -#[test] -fn test_e2e_vec_capacity_after_reserve() { - assert_aelys_bool( - r#" - let v = Vec[]; - v.reserve(50); - v.capacity() >= 50 - "#, - true, - ); -} - -#[test] -fn test_e2e_vec_grow_dynamically() { - assert_aelys_int( - r#" - let v = Vec[]; - let mut i = 0; - while i < 100 { - v.push(i); - i++; - } - v.len() - "#, - 100, - ); -} - -#[test] -fn test_e2e_vec_passed_to_function() { - assert_aelys_int( - r#" - fn double_all(v) { - let mut i = 0; - while i < v.len() { - v[i] *= 2; - i++; - } - } - let v = Vec[1, 2, 3]; - double_all(v); - v[0] + v[1] + v[2] - "#, - 12, // 2+4+6 - ); -} - -#[test] -fn test_e2e_vec_stack_operations() { - assert_aelys_int( - r#" - let stack = Vec[]; - stack.push(1); - stack.push(2); - stack.push(3); - let a = stack.pop(); - stack.push(4); - let b = stack.pop(); - let c = stack.pop(); - a * 100 + b * 10 + c - "#, - 342, // 3*100 + 4*10 + 2 - ); -} - -#[test] -fn test_e2e_float_array_sum() { - let result = run_aelys( - r#" - let arr = [1.5, 2.5, 3.0]; - let mut sum = 0.0; - let mut i = 0; - while i < arr.len() { - sum += arr[i]; - i++; - } - sum - "#, - ); - assert_eq!(result.as_float(), Some(7.0)); -} - -#[test] -fn test_e2e_float_vec_push_pop() { - let result = run_aelys( - r#" - let v = Vec[1.0, 2.0]; - v.push(3.5); - v.pop() - "#, - ); - assert_eq!(result.as_float(), Some(3.5)); -} - -#[test] -fn test_e2e_bool_array_all_true() { - assert_aelys_bool( - r#" - let arr = [true, true, true]; - let mut all = true; - let mut i = 0; - while i < arr.len() { - if arr[i] == false { all = false } - i++; - } - all - "#, - true, - ); -} - -#[test] -fn test_e2e_bool_array_any_true() { - assert_aelys_bool( - r#" - let arr = [false, true, false]; - let mut any = false; - let mut i = 0; - while i < arr.len() { - if arr[i] { any = true } - i++; - } - any - "#, - true, - ); -} - -#[test] -fn test_e2e_bool_vec_push_pop() { - assert_aelys_bool( - r#" - let v = Vec[false, false]; - v.push(true); - v.pop() - "#, - true, - ); -} - -#[test] -fn test_e2e_single_element_array() { - assert_aelys_int("let arr = [42]; arr[0]", 42); - assert_aelys_int("let arr = [42]; arr.len()", 1); -} - -#[test] -fn test_e2e_single_element_vec() { - assert_aelys_int("let v = Vec[99]; v[0]", 99); - assert_aelys_int("let v = Vec[99]; v.len()", 1); -} - -#[test] -fn test_e2e_empty_vec_len() { - assert_aelys_int("let v = Vec[]; v.len()", 0); -} - -#[test] -fn test_e2e_empty_array_len() { - assert_aelys_int("let arr = []; arr.len()", 0); -} - -#[test] -fn test_e2e_vec_pop_returns_correct_type() { - assert_aelys_int( - r#" - let v = Vec[1, 2, 3]; - let x = v.pop(); - x * 10 - "#, - 30, - ); -} - -#[test] -fn test_e2e_array_len_in_expression() { - assert_aelys_int("let arr = [1, 2, 3, 4, 5]; arr.len() * 2", 10); -} - -#[test] -fn test_e2e_vec_len_in_condition() { - assert_aelys_int( - r#" - let v = Vec[1, 2, 3]; - if v.len() > 2 { 100 } else { 0 } - "#, - 100, - ); -} - -// Regression tests: empty Vec[] / Array[] must work with non-int types - -#[test] -fn test_e2e_vec_push_string() { - assert_aelys_str( - r#" - let v = Vec[] - v.push("hello") - v.push("world") - v[0] - "#, - "hello", - ); -} - -#[test] -fn test_e2e_vec_push_string_in_typed_fn() { - assert_aelys_int( - r#" - fn build(n: int) -> int { - let buffer = Vec[] - for i in 0..n { - buffer.push("-") - } - return buffer.len() - } - build(5) - "#, - 5, - ); -} - -#[test] -fn test_e2e_vec_push_float() { - let result = run_aelys( - r#" - let v = Vec[] - v.push(1.5) - v.push(2.5) - v[0] - "#, - ); - assert_eq!(result.as_float(), Some(1.5)); -} - -#[test] -fn test_e2e_vec_push_bool() { - assert_aelys_bool( - r#" - let v = Vec[] - v.push(true) - v.push(false) - v[0] - "#, - true, - ); -} - -#[test] -fn test_e2e_vec_push_then_modify() { - assert_aelys_str( - r#" - let v = Vec[] - v.push("a") - v.push("b") - v[1] = "z" - v[1] - "#, - "z", - ); -} - -#[test] -fn test_e2e_vec_push_in_untyped_fn() { - assert_aelys_int( - r#" - fn build_vec() { - let v = Vec[] - v.push(10) - v.push(20) - v.push(30) - return v.len() - } - build_vec() - "#, - 3, - ); -} - -// Multidimensional array tests - -#[test] -fn test_e2e_2d_array_basic() { - // Create a 2D array (array of arrays) - assert_aelys_int( - r#" - let matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]; - matrix[0][0] - "#, - 1, - ); -} - -#[test] -fn test_e2e_2d_array_access() { - // Access different elements - assert_aelys_int("let m = [[1, 2], [3, 4]]; m[0][1]", 2); - assert_aelys_int("let m = [[1, 2], [3, 4]]; m[1][0]", 3); - assert_aelys_int("let m = [[1, 2], [3, 4]]; m[1][1]", 4); -} - -#[test] -fn test_e2e_2d_array_write() { - // Modify elements in 2D array - assert_aelys_int( - r#" - let m = [[1, 2], [3, 4]]; - m[0][1] = 99; - m[0][1] - "#, - 99, - ); -} - -#[test] -fn test_e2e_2d_array_row_access() { - // Access a row (which is itself an array) - assert_aelys_int( - r#" - let matrix = [[10, 20, 30], [40, 50, 60]]; - let row = matrix[1]; - row[2] - "#, - 60, - ); -} - -#[test] -fn test_e2e_2d_array_sum() { - // Sum all elements in a 2x3 matrix - assert_aelys_int( - r#" - let m = [[1, 2, 3], [4, 5, 6]]; - let mut sum = 0; - let mut i = 0; - while i < 2 { - let mut j = 0; - while j < 3 { - sum += m[i][j]; - j++; - } - i++; - } - sum - "#, - 21, // 1+2+3+4+5+6 - ); -} - -#[test] -fn test_e2e_2d_vec_basic() { - // Vec of vecs - assert_aelys_int( - r#" - let grid = Vec[Vec[1, 2], Vec[3, 4]]; - grid[0][1] - "#, - 2, - ); -} - -#[test] -fn test_e2e_2d_vec_push() { - // Push to inner vec - assert_aelys_int( - r#" - let grid = Vec[Vec[1, 2], Vec[3, 4]]; - grid[0].push(99); - grid[0][2] - "#, - 99, - ); -} - -#[test] -fn test_e2e_2d_vec_modify() { - // Modify elements in vec of vecs - assert_aelys_int( - r#" - let grid = Vec[Vec[1, 2], Vec[3, 4]]; - grid[1][0] = 100; - grid[1][0] - "#, - 100, - ); -} - -#[test] -fn test_e2e_3d_array() { - // 3D array access - assert_aelys_int( - r#" - let cube = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]; - cube[1][0][1] - "#, - 6, // cube[1][0][1] = 6 - ); -} - -#[test] -fn test_e2e_3d_array_write() { - // Modify 3D array element - assert_aelys_int( - r#" - let cube = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]; - cube[0][1][0] = 999; - cube[0][1][0] - "#, - 999, - ); -} - -#[test] -fn test_e2e_2d_array_in_function() { - // Pass 2D array to function - assert_aelys_int( - r#" - fn get_element(matrix, row, col) { - return matrix[row][col] - } - let m = [[10, 20, 30], [40, 50, 60]]; - get_element(m, 1, 2) - "#, - 60, - ); -} - -#[test] -fn test_e2e_2d_array_transpose() { - // Simple 2x2 matrix transpose - assert_aelys_int( - r#" - let m = [[1, 2], [3, 4]]; - let t = [[0, 0], [0, 0]]; - let mut i = 0; - while i < 2 { - let mut j = 0; - while j < 2 { - t[j][i] = m[i][j]; - j++; - } - i++; - } - t[0][1] + t[1][0] - "#, - 5, // t[0][1]=3, t[1][0]=2, sum=5 - ); -} - -#[test] -fn test_e2e_mixed_array_vec() { - // Array of vecs - assert_aelys_int( - r#" - let data = [Vec[1, 2], Vec[3, 4, 5]]; - data[1][2] - "#, - 5, - ); -} - -#[test] -fn test_e2e_vec_of_arrays() { - // Vec containing arrays - assert_aelys_int( - r#" - let data = Vec[[1, 2], [3, 4]]; - data[0][1] - "#, - 2, - ); -} - -#[test] -fn test_e2e_2d_array_float() { - // 2D float array - let result = run_aelys( - r#" - let m = [[1.0, 2.0], [3.0, 4.0]]; - m[0][1] + m[1][1] - "#, - ); - assert_eq!(result.as_float(), Some(6.0)); -} - -#[test] -fn test_e2e_2d_array_bool() { - // 2D boolean array - assert_aelys_bool( - r#" - let grid = [[true, false], [false, true]]; - grid[0][0] - "#, - true, - ); -} - -#[test] -fn test_e2e_2d_array_len() { - // Get dimensions of 2D array - assert_aelys_int( - r#" - let m = [[1, 2, 3], [4, 5, 6]]; - let rows = m.len(); - let cols = m[0].len(); - rows * 10 + cols - "#, - 23, // 2 rows, 3 cols -> 23 - ); -} - -#[test] -fn test_e2e_2d_array_computed_index() { - // Use computed indices - assert_aelys_int( - r#" - let m = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]; - let i = 1; - let j = 2; - m[i][j] - "#, - 6, - ); -} - -#[test] -fn test_e2e_2d_array_nested_computed() { - // Use array value as index for another array - assert_aelys_int( - r#" - let indices = [[0, 1], [1, 0]]; - let data = [[10, 20], [30, 40]]; - let row = indices[0][1]; - let col = indices[1][0]; - data[row][col] - "#, - 40, // indices[0][1]=1, indices[1][0]=1, so data[1][1]=40 - ); -} - -#[test] -fn test_e2e_2d_array_find_max() { - // Find max element in 2D array - assert_aelys_int( - r#" - let m = [[5, 2, 8], [1, 9, 3], [4, 6, 7]]; - let mut max = m[0][0]; - let mut i = 0; - while i < 3 { - let mut j = 0; - while j < 3 { - if m[i][j] > max { - max = m[i][j]; - } - j++; - } - i++; - } - max - "#, - 9, - ); -} diff --git a/aelys/tests/audit_regression_tests.rs b/aelys/tests/audit_regression_tests.rs new file mode 100644 index 0000000..9b6cb53 --- /dev/null +++ b/aelys/tests/audit_regression_tests.rs @@ -0,0 +1,1521 @@ +use aelys_air::lower::lower; +use aelys_air::passes::validate::validate_air; +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn lower_source(code: &str) -> aelys_air::AirProgram { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let typed = TypeInference::infer_program(stmts, src).expect("sema failed"); + lower(&typed) +} + +fn air_pipeline_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let typed = match TypeInference::infer_program(stmts, src) { + Ok(t) => t, + Err(_) => return false, + }; + let air = lower(&typed); + validate_air(&air).is_ok() +} + +fn sema_error_count(code: &str) -> usize { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + match TypeInference::infer_program(stmts, src) { + Ok(_) => 0, + Err(errors) => errors.len(), + } +} + +#[test] +fn a5_bug001_undefined_assign_no_cascade() { + let count = sema_error_count( + r#" +fn test() { + undefined_var = 42 + let x = undefined_var + let y = undefined_var +} +"#, + ); + assert!( + count <= 2, + ": undefined_var used 3 times should produce at most 2 errors, got {}", + count + ); +} + +#[test] +fn a5_bug001_single_undefined_single_error() { + let count = sema_error_count( + r#" +fn test() { + bad = 1 +} +"#, + ); + assert_eq!( + count, 1, + ": single undefined assignment should produce exactly 1 error" + ); +} + +#[test] +fn a5_bug002_undefined_read_no_cascade() { + let count = sema_error_count( + r#" +fn test() { + let x = ghost + 1 + let y = ghost * 2 + let z = ghost +} +"#, + ); + assert!( + count <= 2, + "A5-BUG-002: ghost used 3 times should produce at most 2 errors, got {}", + count + ); +} + +#[test] +fn a3_bug001_narrowing_through_variable_i8() { + assert!( + sema_ok( + r#" +fn f() -> i8 { + let x = 100 + return x +} +"# + ), + ": let x = 100; return x in i8 fn should pass sema" + ); +} + +#[test] +fn a3_bug001_narrowing_through_variable_i32() { + assert!( + sema_ok( + r#" +fn f() -> i32 { + let x = 100 + return x +} +"# + ), + ": let x = 100; return x in i32 fn should pass sema" + ); +} + +#[test] +fn a3_bug001_narrowing_chain() { + assert!( + sema_ok( + r#" +fn f() -> i8 { + let x = 42 + let y = x + return y +} +"# + ), + ": let y = x where x = 42, return y in i8 fn should pass sema" + ); +} + +#[test] +fn a3_bug001_overflow_through_variable() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let x = 200 + return x +} +"# + ), + ": let x = 200; return x in i8 fn should FAIL (overflow)" + ); +} + +#[test] +fn a3_bug001_mutable_not_tracked() { + // mutable variables should not be tracked for narrowing because they can be reassigned. this test ensures we don't incorrectly narrow through a mutable variable. + assert!( + sema_ok( + r#" +fn f() -> i64 { + let mut x = 100 + x = 999999999 + return x +} +"# + ), + ": mutable variable should not be narrowed (could be reassigned)" + ); +} + +#[test] +fn a3_bug001_negative_literal_through_variable() { + assert!( + sema_ok( + r#" +fn f() -> i8 { + let x = -1 + return x +} +"# + ), + ": let x = -1; return x in i8 fn should pass sema" + ); +} + +#[test] +fn if_else_narrowing_i32() { + assert!( + sema_ok( + r#" +fn f() -> i32 { + let x = if true { 42 } else { 100 } + return x +} +"# + ), + "if-else with i32-fitting literals assigned to var should pass" + ); +} + +#[test] +fn if_else_both_literals_i8() { + assert!( + sema_ok( + r#" +fn f() -> i8 { + let x = if true { 10 } else { 20 } + return x +} +"# + ), + "if-else with i8-fitting literals should pass" + ); +} + +#[test] +fn if_else_direct_return_narrowing() { + assert!( + sema_ok( + r#" +fn f() -> i32 { + return if true { 1 } else { 2 } +} +"# + ), + "direct return of if-else with literals should narrow to i32" + ); +} + +#[test] +fn if_else_narrowing_through_variable() { + assert!( + sema_ok( + r#" +fn f() -> i16 { + let x = if true { 300 } else { 500 } + return x +} +"# + ), + "if-else literals tracked and narrowed through variable for i16" + ); +} + +#[test] +fn if_else_overflow_detected() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let x = if true { 10 } else { 200 } + return x +} +"# + ), + "if-else with one branch overflowing i8 should fail" + ); +} + +#[test] +fn if_else_same_concrete_type_no_fresh_var() { + assert!( + sema_ok( + r#" +fn f() -> i64 { + let x = if true { 42 } else { 100 } + return x +} +"# + ), + "if-else with same concrete type should not create a fresh Var" + ); +} + +#[test] +fn if_else_different_types_rejected() { + assert!( + !sema_ok( + r#" +fn f() -> i64 { + let x = if true { 42 } else { "hello" } + return x +} +"# + ), + "if-else with different types should be rejected" + ); +} + +#[test] +fn if_without_else_not_affected() { + assert!( + sema_ok( + r#" +fn f() -> i64 { + let x = 10 + if x > 5 { + return 1 + } + return 0 +} +"# + ), + "if without else should still work" + ); +} + +#[test] +fn normal_code_compiles_through_air_pipeline() { + assert!( + air_pipeline_ok( + r#" +fn add(a: i64, b: i64) -> i64 { + return a + b +} + +fn main() -> i64 { + let x = 42 + let y = add(x, 10) + return y +} +"# + ), + "basic function calls should pass AIR validation" + ); +} + +#[test] +fn function_with_if_else_compiles_through_air() { + assert!( + air_pipeline_ok( + r#" +fn f(x: i64) -> i64 { + let result = if x > 0 { x } else { 0 } + return result +} +"# + ), + "if-else with matching types should pass AIR validation" + ); +} + +#[test] +fn multi_function_pipeline_no_var_leak() { + assert!( + air_pipeline_ok( + r#" +fn double(n: i64) -> i64 { + return n * 2 +} + +fn is_positive(n: i64) -> bool { + return n > 0 +} + +fn main() -> i64 { + let a = 5 + let b = double(a) + let flag = is_positive(b) + if flag { + return b + } + return 0 +} +"# + ), + "multi-function code should not leak type variables into AIR" + ); +} + +#[test] +fn oneof_no_leak_on_binary_op() { + assert!( + sema_ok( + r#" +fn f(a: i64, b: i64) -> i64 { + let c = a + b + let d = c * 2 + let e = d - a + return e +} +"# + ), + "chained binary ops should not corrupt type bindings" + ); +} + +#[test] +fn oneof_no_corruption_on_type_mismatch() { + let count = sema_error_count( + r#" +fn f(x: string) -> i64 { + return x + 1 +} +"#, + ); + assert!( + count >= 1, + "string + int should produce at least 1 error, got {}", + count + ); +} + +#[test] +fn oneof_successful_match_preserves_bindings() { + assert!( + sema_ok( + r#" +fn f(a: i32, b: i32) -> i32 { + let x = a + b + let y = x * a + return y - b +} +"# + ), + "binary ops on i32 should resolve correctly through OneOf" + ); +} + +#[test] +fn oneof_multiple_ops_same_function() { + assert!( + air_pipeline_ok( + r#" +fn compute(a: i64, b: i64) -> i64 { + let sum = a + b + let diff = a - b + let prod = sum * diff + return prod +} +"# + ), + "multiple binary ops in one function should not leak vars to AIR" + ); +} + +#[test] +fn oneof_failed_match_no_pollution() { + let count = sema_error_count( + r#" +fn f(x: bool) -> bool { + return x + true +} +"#, + ); + assert!( + count >= 1, + "bool + bool should produce at least 1 error, got {}", + count + ); +} + +#[test] +fn generic_function_called_with_different_types() { + assert!( + sema_ok( + r#" +fn identity(x: T) -> T { return x } + +fn main() { + let a = identity(42) + let b = identity("hello") +} +"# + ), + "generic function called with i64 and string should both pass" + ); +} + +#[test] +fn generic_function_preserves_return_type_per_call() { + assert!( + air_pipeline_ok( + r#" +fn identity(x: T) -> T { return x } + +fn main() -> i64 { + let a = identity(42) + return a +} +"# + ), + "generic function return type should resolve correctly through AIR" + ); +} + +#[test] +fn unannotated_function_infers_single_type() { + assert!( + sema_ok( + r#" +fn double(x) { return x * 2 } + +fn main() -> i64 { + return double(21) +} +"# + ), + "unannotated function called with one type should infer correctly" + ); +} + +#[test] +fn closure_captures_resolved_after_substitution() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + let x: i64 = 100 + let closure = fn() -> i64 { return x } + return closure() +} +"# + ), + "closure capturing a typed variable should resolve through pipeline" + ); +} + +#[test] +fn closure_capture_with_inferred_type() { + assert!( + air_pipeline_ok( + r#" +fn main() -> i64 { + let x = 42 + let f = fn() -> i64 { return x } + return f() +} +"# + ), + "closure capturing inferred variable should pass AIR validation" + ); +} + +#[test] +fn multiple_functions_independent_inference() { + assert!( + sema_ok( + r#" +fn add(a: i64, b: i64) -> i64 { return a + b } +fn greet(name: string) -> string { return name } + +fn main() { + let x = add(1, 2) + let y = greet("hello") +} +"# + ), + "independent functions with different types should not interfere" + ); +} + +#[test] +fn nested_function_types_resolved() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + fn inner(x: i64) -> i64 { return x + 1 } + return inner(41) +} +"# + ), + "nested function should have types resolved correctly" + ); +} + +#[test] +fn equal_constraint_failure_does_not_corrupt_subsequent_inference() { + assert!( + sema_ok( + r#" +fn f(a: i64, b: i64) -> i64 { + let x = a + b + return x +} +"# + ), + "valid code after constraint solving should pass" + ); +} + +#[test] +fn mixed_type_errors_do_not_cascade_through_solver() { + let count = sema_error_count( + r#" +fn f(a: i64, b: string) -> i64 { + let x = a + b + let y = a + 1 + return y +} +"#, + ); + assert!( + count >= 1 && count <= 3, + "type mismatch in one expr should not cascade unboundedly, got {}", + count + ); +} + +#[test] +fn function_type_mismatch_on_return_does_not_corrupt_solver() { + let count = sema_error_count( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { + return f(x) +} + +fn main() -> i64 { + let g = fn(n: i64) -> i64 { return n + 1 } + return apply(g, 10) +} +"#, + ); + assert_eq!( + count, 0, + "valid higher-order function should produce no errors" + ); +} + +#[test] +fn function_unification_partial_param_match_rolled_back() { + let count = sema_error_count( + r#" +fn f() -> i64 { + let x: i64 = 10 + let y: string = "hello" + return x + 1 +} +"#, + ); + assert_eq!( + count, 0, + "unrelated variables should not interfere with each other" + ); +} + +#[test] +fn compound_type_unification_failure_isolated() { + let count = sema_error_count( + r#" +fn f(a: i64, b: i64) -> i64 { + let sum = a + b + let prod = a * b + return sum + prod +} +"#, + ); + assert_eq!( + count, 0, + "compound expressions should unify cleanly without partial corruption" + ); +} + +#[test] +fn solver_rollback_preserves_valid_bindings_after_error() { + let count = sema_error_count( + r#" +fn f() -> i64 { + let a: i64 = 10 + let b: i64 = 20 + let c = a + b + return c +} +"#, + ); + assert_eq!(count, 0, "valid bindings should survive constraint solving"); +} + +#[test] +fn closure_capture_inferred_var_resolved_through_pipeline() { + assert!( + air_pipeline_ok( + r#" +fn outer() { + let x = 100 + let closure = fn() { return x } + return x +} +"# + ), + "closure capturing Var(N) should resolve after substitution + finalize" + ); +} + +#[test] +fn closure_capture_multiple_inferred_vars() { + assert!( + air_pipeline_ok( + r#" +fn outer() -> i64 { + let a = 10 + let b = 20 + let closure = fn() -> i64 { return a + b } + return closure() +} +"# + ), + "closure capturing multiple inferred variables should pass AIR" + ); +} + +#[test] +fn nested_closure_captures_propagate() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + let x: i64 = 42 + let f = fn() -> i64 { + let g = fn() -> i64 { return x } + return g() + } + return f() +} +"# + ), + "nested closures should capture and resolve types correctly" + ); +} + +#[test] +fn closure_capture_used_in_binary_op() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + let x = 5 + let f = fn() -> i64 { return x + 1 } + return f() +} +"# + ), + "captured variable used in binary op should type-check" + ); +} + +#[test] +fn overflow_through_variable_binop_i8() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let x = 100 + return x + 28 +} +"# + ), + "100 + 28 = 128 overflows i8, should be rejected" + ); +} + +#[test] +fn variable_binop_fits_i8() { + assert!( + sema_ok( + r#" +fn f() -> i8 { + let x = 50 + return x + 20 +} +"# + ), + "50 + 20 = 70 fits i8, should pass" + ); +} + +#[test] +fn overflow_variable_on_right_side() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let y = 28 + return 100 + y +} +"# + ), + "100 + 28 = 128 overflows i8 with variable on right" + ); +} + +#[test] +fn overflow_both_variables_binop() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let a = 100 + let b = 28 + return a + b +} +"# + ), + "100 + 28 = 128 overflows i8 with both operands as variables" + ); +} + +#[test] +fn overflow_through_chained_variable_binop() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let x = 100 + let y = x + return y + 28 +} +"# + ), + "chained copy 100 + 28 = 128 overflows i8" + ); +} + +#[test] +fn variable_subtraction_overflow_i8() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let x = -100 + return x - 29 +} +"# + ), + "-100 - 29 = -129 overflows i8 (min is -128)" + ); +} + +#[test] +fn variable_multiplication_overflow_i8() { + assert!( + !sema_ok( + r#" +fn f() -> i8 { + let x = 20 + return x * 7 +} +"# + ), + "20 * 7 = 140 overflows i8" + ); +} + +#[test] +fn variable_binop_fits_i16() { + assert!( + sema_ok( + r#" +fn f() -> i16 { + let x = 10000 + return x + 5000 +} +"# + ), + "10000 + 5000 = 15000 fits i16, should pass" + ); +} + +#[test] +fn variable_binop_overflow_i16() { + assert!( + !sema_ok( + r#" +fn f() -> i16 { + let x = 30000 + return x + 3000 +} +"# + ), + "30000 + 3000 = 33000 overflows i16 (max 32767)" + ); +} + +#[test] +fn negative_variable_binop_fits_i8() { + assert!( + sema_ok( + r#" +fn f() -> i8 { + let x = -100 + return x + 10 +} +"# + ), + "-100 + 10 = -90 fits i8, should pass" + ); +} + +#[test] +fn let_shadowing_function_name_then_call_rejected() { + assert!( + !sema_ok( + r#" +fn outer() -> i64 { + return 42 +} + +fn test() { + let outer = 99 + let x = outer() +} +"# + ), + "calling a variable that shadows a function should be a type error" + ); +} + +#[test] +fn let_shadowing_function_name_without_call_is_valid() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + return 42 +} + +fn test() { + let outer = 99 +} +"# + ), + "shadowing a function name with a let without calling should be valid" + ); +} + +#[test] +fn nested_fn_same_name_in_different_parents_independent() { + assert!( + sema_ok( + r#" +fn outer() { + fn inner() -> i64 { return 1 } + let x = inner() +} + +fn test() { + fn inner() -> i64 { return 2 } + let x = inner() +} +"# + ), + "same-named nested functions in different parents should be independent" + ); +} + +#[test] +fn nested_fn_not_visible_outside_parent() { + assert!( + !sema_ok( + r#" +fn outer() { + fn inner() -> i64 { return 1 } +} + +fn test() { + let x = inner() +} +"# + ), + "nested function should not be visible outside its parent" + ); +} + +#[test] +fn nested_fn_callable_from_within_parent() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + fn helper() -> i64 { return 42 } + return helper() +} +"# + ), + "nested function should be callable from within its parent" + ); +} + +#[test] +fn multiple_errors_do_not_compound_through_rollback() { + let count = sema_error_count( + r#" +fn f(x: i64) -> i64 { + let a = x + "bad" + let b = x + true + return x +} +"#, + ); + assert!( + count <= 4, + "two independent type errors should not compound, got {}", + count + ); +} + +#[test] +fn struct_field_unknown_type_rejected() { + assert!( + !sema_ok( + r#" +struct Foo { x: CompletelyMadeUpType } +fn main() { + let y = 42 +} +"# + ), + "struct with nonexistent field type should be rejected even if unused" + ); +} + +#[test] +fn struct_field_valid_primitive_types_accepted() { + assert!( + sema_ok( + r#" +struct Point { x: i64, y: i64 } +fn main() { + let p = Point { x: 1, y: 2 } +} +"# + ), + "struct with valid primitive field types should pass" + ); +} + +#[test] +fn struct_field_references_other_struct() { + assert!( + sema_ok( + r#" +struct Inner { value: i64 } +struct Outer { child: Inner } +fn main() { + let i = Inner { value: 1 } +} +"# + ), + "struct referencing another struct should pass" + ); +} + +#[test] +fn struct_field_forward_reference_accepted() { + assert!( + sema_ok( + r#" +struct Outer { child: Inner } +struct Inner { value: i64 } +fn main() { + let i = Inner { value: 1 } +} +"# + ), + "struct forward-referencing a later struct should pass" + ); +} + +#[test] +fn generic_struct_field_type_param_accepted() { + assert!( + sema_ok( + r#" +struct Wrapper { value: T } +fn main() { + let w = Wrapper { value: 42 } +} +"# + ), + "generic struct with type param field should pass" + ); +} + +#[test] +fn struct_multiple_invalid_fields_all_reported() { + let count = sema_error_count( + r#" +struct Bad { a: FakeTypeA, b: FakeTypeB } +fn main() { + let y = 42 +} +"#, + ); + assert!( + count >= 2, + "struct with two invalid field types should produce at least 2 errors, got {}", + count + ); +} + +#[test] +fn struct_name_collides_with_type_param_mismatch_detected() { + assert!( + !sema_ok( + r#" +fn unrelated(x: T) -> T { return x } +struct T { value: i64 } +fn main() { + let x: T = 42 +} +"# + ), + "assigning i64 to struct T should be rejected despite T being a type param elsewhere" + ); +} + +#[test] +fn struct_name_collides_with_type_param_valid_usage() { + assert!( + sema_ok( + r#" +fn unrelated(x: T) -> T { return x } +struct T { value: i64 } +fn main() { + let x = T { value: 42 } +} +"# + ), + "constructing struct T should work despite T being a type param elsewhere" + ); +} + +#[test] +fn generic_fn_no_collision_still_works() { + assert!( + sema_ok( + r#" +fn identity(x: T) -> T { return x } +fn main() -> i64 { + return identity(42) +} +"# + ), + "generic function without struct name collision should work" + ); +} + +#[test] +fn return_stack_array_lowers_successfully() { + // Arrays are returned by value and should lower without errors. + assert!(air_pipeline_ok( + r#" +fn bar() -> [i64; 3] { + let arr = [1, 2, 3] + return arr +} +"#, + )); +} + +#[test] +fn return_stack_array_compiles() { + // Arrays can be returned by value; this should lower successfully. + assert!(air_pipeline_ok( + r#" +fn baz() -> [i64; 2] { + let arr = [10, 20] + return arr +} +"#, + )); +} + +#[test] +fn constant_sized_array_in_let_compiles() { + assert!( + air_pipeline_ok( + r#" +fn f() -> i64 { + let arr = [1, 2, 3] + return arr[0] +} +"# + ), + "constant-sized array in let should compile through AIR" + ); +} + +#[test] +fn nested_fn_as_last_stmt_with_return_type_rejected() { + assert!( + !sema_ok( + r#" +fn outer() -> i64 { + fn inner() -> i64 { return 1 } +} +"# + ), + "nested function as last stmt in non-void function should be rejected" + ); +} + +#[test] +fn nested_fn_as_last_stmt_void_return_accepted() { + assert!( + sema_ok( + r#" +fn outer() { + fn inner() -> i64 { return 1 } +} +"# + ), + "nested function as last stmt in void function should be accepted" + ); +} + +#[test] +fn nested_fn_followed_by_return_accepted() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + fn inner() -> i64 { return 1 } + return inner() +} +"# + ), + "nested function followed by explicit return should be accepted" + ); +} + +#[test] +fn unify_prevents_var_cycle_through_resolution() { + // Var(A) unified with Var(B), then Var(B) unified with Var(A) + // should succeed (identity) without creating a cycle + assert!( + sema_ok( + r#" +fn f(a, b) { + let x = a + let y = b + let z: i64 = x + let w: i64 = y +} +"# + ), + "unifying vars that resolve to the same type should not cycle" + ); +} + +#[test] +fn compound_generic_does_not_create_infinite_type() { + assert!( + sema_ok( + r#" +fn wrap(x: T) -> T { + return x +} + +fn main() -> i64 { + let a = wrap(42) + let b = wrap(a) + return b +} +"# + ), + "chained generic calls should resolve without infinite type" + ); +} + +#[test] +fn self_referencing_let_is_undefined_variable() { + assert!( + !sema_ok( + r#" +fn test() { + let f = fn(x) { return f } +} +"# + ), + "self-referencing let should fail with undefined variable" + ); +} + +#[test] +fn var_chain_resolves_without_cycle() { + assert!( + sema_ok( + r#" +fn test() -> i64 { + let a = 1 + let b = a + let c = b + let d = c + return d +} +"# + ), + "long chain of variable copies should resolve without cycle" + ); +} + +#[test] +fn substitution_apply_resolves_nested_vars() { + assert!( + air_pipeline_ok( + r#" +fn identity(x: T) -> T { return x } + +fn main() -> i64 { + let a = identity(42) + let b = identity(a) + return b +} +"# + ), + "nested generic calls should resolve vars completely" + ); +} + +#[test] +fn type_mismatch_does_not_leave_corrupt_substitution() { + let count = sema_error_count( + r#" +fn f() -> i64 { + let x: i64 = "bad" + let y = 42 + return y +} +"#, + ); + assert!( + count >= 1 && count <= 2, + "type mismatch should produce errors without corrupting solver, got {}", + count + ); +} + +#[test] +fn error_recovery_does_not_corrupt_unrelated_compound_type() { + let count = sema_error_count( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { + return f(x) +} + +fn main() -> i64 { + let bad = "hello" + 42 + let g = fn(n: i64) -> i64 { return n + 1 } + return apply(g, 10) +} +"#, + ); + assert!( + count >= 1 && count <= 3, + "error in one expression should not corrupt function type inference, got {}", + count + ); +} + +#[test] +fn inner_vars_in_function_type_resolve_through_finalization() { + assert!( + air_pipeline_ok( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { + return f(x) +} + +fn main() -> i64 { + let inc = fn(n: i64) -> i64 { return n + 1 } + return apply(inc, 5) +} +"# + ), + "function type params should resolve through full pipeline" + ); +} + +#[test] +fn force_dynamic_on_error_preserves_valid_inference_elsewhere() { + let count = sema_error_count( + r#" +fn f(x) { + let y: i64 = x + let z = y + "hello" + let w: i64 = 42 + return w +} +"#, + ); + assert!( + count >= 1 && count <= 3, + "type error should not corrupt unrelated bindings, got {}", + count + ); +} + +#[test] +fn recursion_limit_orphan_constraints_do_not_cause_miscompilation() { + // even if orphan constraints exist from partial inference, + // the RecursionLimit error makes the program fail at sema + assert!( + sema_ok( + r#" +fn f() -> i64 { + return 1 + 2 + 3 +} +"# + ), + "normal expressions should not trigger recursion limit" + ); +} + +#[test] +fn recursion_limit_does_not_prevent_other_function_inference() { + assert!( + sema_ok( + r#" +fn simple() -> i64 { + return 42 +} +"# + ), + "normal functions should still compile" + ); +} + +#[test] +fn lambda_calling_nested_function_in_outer_scope() { + assert!( + air_pipeline_ok( + r#" +fn outer() -> i64 { + fn helper() -> i64 { return 42 } + let lambda = fn() -> i64 { return helper() } + return helper() +} +"# + ), + "lambda calling a sibling function should compile through AIR" + ); +} + +#[test] +fn lambda_calling_toplevel_function() { + assert!( + air_pipeline_ok( + r#" +fn helper() -> i64 { return 10 } + +fn main() -> i64 { + let f = fn() -> i64 { return helper() } + return helper() +} +"# + ), + "lambda calling a top-level function should compile through AIR" + ); +} + +#[test] +fn array_literal_narrowing_tracked_variable() { + assert!( + sema_ok( + r#" +fn f() -> [i32; 3] { + let x = 100 + return [x, 1, 2] +} +"# + ), + "tracked variable in array literal should narrow to i32" + ); +} + +#[test] +fn array_literal_narrowing_all_literals() { + assert!( + sema_ok( + r#" +fn f() -> [i32; 3] { + return [1, 2, 3] +} +"# + ), + "all-literal array should narrow to i32" + ); +} + +#[test] +fn array_literal_narrowing_untracked_param_rejected() { + assert!( + !sema_ok( + r#" +fn f(y: i64) -> [i32; 3] { + return [y, 1, 2] +} +"# + ), + "i64 parameter in i32 array should be rejected" + ); +} + +#[test] +fn array_literal_narrowing_mixed_tracked_and_literal() { + assert!( + sema_ok( + r#" +fn f() -> [i32; 4] { + let a = 10 + let b = 20 + return [a, b, 30, 40] +} +"# + ), + "mix of tracked variables and literals should narrow" + ); +} diff --git a/aelys/tests/avbc_manifest_tests.rs b/aelys/tests/avbc_manifest_tests.rs deleted file mode 100644 index ed1f665..0000000 --- a/aelys/tests/avbc_manifest_tests.rs +++ /dev/null @@ -1,15 +0,0 @@ -use aelys_bytecode::asm::binary::{deserialize_with_manifest, serialize_with_manifest}; -use aelys_runtime::{Function, Heap}; - -#[test] -fn avbc_round_trip_manifest() { - let func = Function::new(None, 0); - let heap = Heap::new(); - let manifest = b"[build]\nbundle_native_modules = true\n".to_vec(); - - let bytes = serialize_with_manifest(&func, &heap, Some(&manifest), None); - let (_func, _heap, decoded_manifest, _bundles) = - deserialize_with_manifest(&bytes).expect("read"); - - assert_eq!(decoded_manifest.unwrap(), manifest); -} diff --git a/aelys/tests/benchmarks/bench_call_overhead.aelys b/aelys/tests/benchmarks/bench_call_overhead.aelys deleted file mode 100644 index d1283c1..0000000 --- a/aelys/tests/benchmarks/bench_call_overhead.aelys +++ /dev/null @@ -1,10 +0,0 @@ -// Pure call overhead benchmark -// Simple function that just returns, to measure call/return cost - -fn noop() -> int { return 42 } - -let mut sum = 0 -for i in 0..50000000 { - sum = sum + noop() -} -sum diff --git a/aelys/tests/benchmarks/bench_fib35_typed.aelys b/aelys/tests/benchmarks/bench_fib35_typed.aelys deleted file mode 100644 index f0c1b9b..0000000 --- a/aelys/tests/benchmarks/bench_fib35_typed.aelys +++ /dev/null @@ -1,9 +0,0 @@ -// Fibonacci 35 - typed version -fn fib(n: int) -> int { - if n < 2 { - return n; - } - return fib(n - 1) + fib(n - 2); -} - -fib(35) diff --git a/aelys/tests/benchmarks/bench_global_calls.aelys b/aelys/tests/benchmarks/bench_global_calls.aelys deleted file mode 100644 index 4920bd8..0000000 --- a/aelys/tests/benchmarks/bench_global_calls.aelys +++ /dev/null @@ -1,16 +0,0 @@ -// Benchmark: Many global function calls in a loop -// Tests cache efficiency for repeated calls to same global - -fn add(a: int, b: int) -> int { return a + b } -fn sub(a: int, b: int) -> int { return a - b } -fn mul(a: int, b: int) -> int { return a * b } - -// Call these functions many times in a loop -let mut result = 0 -for i in 0..10000000 { - result = add(result, 1) - result = sub(result, 1) - result = add(result, mul(2, 3)) - result = sub(result, 6) -} -result diff --git a/aelys/tests/benchmarks/bench_inline_cache.aelys b/aelys/tests/benchmarks/bench_inline_cache.aelys deleted file mode 100644 index bd7d2c4..0000000 --- a/aelys/tests/benchmarks/bench_inline_cache.aelys +++ /dev/null @@ -1,24 +0,0 @@ -needs sqrt from std.math - -fn compute(x: int) -> int { - return x * 2 + 1 -} - -fn process(a: int, b: int) -> int { - return a + b -} - -let ptr = alloc(1) -store(ptr, 0, 0) - -let mut result = 0 - -for i in 0..5000000 { - let v = load(ptr, 0) - let c = compute(v) - let p = process(c, i) - store(ptr, 0, p % 1000) - result = result + 1 -} - -result diff --git a/aelys/tests/benchmarks/bench_mutual_recursion.aelys b/aelys/tests/benchmarks/bench_mutual_recursion.aelys deleted file mode 100644 index 163498f..0000000 --- a/aelys/tests/benchmarks/bench_mutual_recursion.aelys +++ /dev/null @@ -1,20 +0,0 @@ -// Mutual recursion benchmark - is_even/is_odd -// Each function calls a different global, testing cache separation - -fn is_even(n: int) -> bool { - if n == 0 { return true } - return is_odd(n - 1) -} - -fn is_odd(n: int) -> bool { - if n == 0 { return false } - return is_even(n - 1) -} - -// Call both functions many times -let mut result = 0 -for i in 0..1000000 { - if is_even(20) { result = result + 1 } - if is_odd(21) { result = result + 1 } -} -result diff --git a/aelys/tests/benchmarks/fib35.py b/aelys/tests/benchmarks/fib35.py deleted file mode 100644 index 8bfb102..0000000 --- a/aelys/tests/benchmarks/fib35.py +++ /dev/null @@ -1,6 +0,0 @@ -def fib(n): - if n < 2: - return n - return fib(n - 1) + fib(n - 2) - -print(fib(35)) diff --git a/aelys/tests/bitwise_tests.rs b/aelys/tests/bitwise_tests.rs deleted file mode 100644 index d5562a3..0000000 --- a/aelys/tests/bitwise_tests.rs +++ /dev/null @@ -1,488 +0,0 @@ -use aelys::run_with_config_and_opt; -use aelys_opt::OptimizationLevel; -use aelys_runtime::{Value, VmConfig}; - -fn run(code: &str) -> Value { - run_with_config_and_opt( - code, - "", - VmConfig::default(), - Vec::new(), - OptimizationLevel::None, - ) - .expect("Code should execute successfully") -} - -fn run_with_opt(code: &str, level: OptimizationLevel) -> Value { - run_with_config_and_opt(code, "", VmConfig::default(), Vec::new(), level) - .expect("Code should execute successfully") -} - -fn run_fails(code: &str) -> bool { - run_with_config_and_opt( - code, - "", - VmConfig::default(), - Vec::new(), - OptimizationLevel::None, - ) - .is_err() -} - -#[test] -fn test_left_shift() { - let result = run("5 << 2"); - assert_eq!(result.as_int(), Some(20)); // 5 * 4 = 20 -} - -#[test] -fn test_right_shift() { - let result = run("20 >> 2"); - assert_eq!(result.as_int(), Some(5)); // 20 / 4 = 5 -} - -#[test] -fn test_right_shift_negative() { - // Arithmetic right shift preserves sign - let result = run("-8 >> 2"); - assert_eq!(result.as_int(), Some(-2)); -} - -#[test] -fn test_bitwise_and() { - let result = run("12 & 10"); - // 12 = 1100, 10 = 1010, AND = 1000 = 8 - assert_eq!(result.as_int(), Some(8)); -} - -#[test] -fn test_bitwise_or() { - let result = run("12 | 10"); - // 12 = 1100, 10 = 1010, OR = 1110 = 14 - assert_eq!(result.as_int(), Some(14)); -} - -#[test] -fn test_bitwise_xor() { - let result = run("12 ^ 10"); - // 12 = 1100, 10 = 1010, XOR = 0110 = 6 - assert_eq!(result.as_int(), Some(6)); -} - -#[test] -fn test_bitwise_not() { - let result = run("~5"); - // Two's complement: ~5 = -6 - assert_eq!(result.as_int(), Some(-6)); -} - -#[test] -fn test_bitwise_not_negative() { - let result = run("~(-6)"); - // ~(-6) = 5 - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_shift_by_zero() { - let result = run("42 << 0"); - assert_eq!(result.as_int(), Some(42)); - - let result = run("42 >> 0"); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_bitwise_with_zero() { - let result = run("0 & 255"); - assert_eq!(result.as_int(), Some(0)); - - let result = run("0 | 255"); - assert_eq!(result.as_int(), Some(255)); - - let result = run("0 ^ 255"); - assert_eq!(result.as_int(), Some(255)); -} - -#[test] -fn test_bitwise_identity() { - let result = run("255 & 255"); - assert_eq!(result.as_int(), Some(255)); - - let result = run("255 | 255"); - assert_eq!(result.as_int(), Some(255)); - - let result = run("255 ^ 255"); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn test_not_not_identity() { - let result = run("~~42"); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_precedence_shift_over_and() { - // << and >> have higher precedence than & - let result = run("1 << 4 & 16"); - // Should be (1 << 4) & 16 = 16 & 16 = 16 - assert_eq!(result.as_int(), Some(16)); -} - -#[test] -fn test_precedence_and_over_xor() { - // & has higher precedence than ^ - let result = run("15 & 7 ^ 3"); - // Should be (15 & 7) ^ 3 = 7 ^ 3 = 4 - assert_eq!(result.as_int(), Some(4)); -} - -#[test] -fn test_precedence_xor_over_or() { - // ^ has higher precedence than | - let result = run("8 ^ 4 | 2"); - // Should be (8 ^ 4) | 2 = 12 | 2 = 14 - assert_eq!(result.as_int(), Some(14)); -} - -#[test] -fn test_precedence_not_highest() { - // ~ has highest precedence - let result = run("~0 & 255"); - // Should be (~0) & 255 = -1 & 255 = 255 - assert_eq!(result.as_int(), Some(255)); -} - -#[test] -fn test_compound_bit_flags() { - // Setting multiple bits - let result = run("(1 << 0) | (1 << 2) | (1 << 4)"); - // 1 + 4 + 16 = 21 - assert_eq!(result.as_int(), Some(21)); -} - -#[test] -fn test_mask_and_shift() { - // Extract middle byte - let result = run("(0xFF00 >> 8) & 0xFF"); - assert_eq!(result.as_int(), Some(255)); -} - -#[test] -fn test_toggle_bits() { - // Toggle bits using XOR - let result = run("0b1010 ^ 0b1111"); - // 10 ^ 15 = 5 - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_bitwise_with_variables() { - let code = r#" - let x = 5 - let shift = 2 - x << shift - "#; - let result = run(code); - assert_eq!(result.as_int(), Some(20)); -} - -#[test] -fn test_bitwise_mask_with_variables() { - let code = r#" - let value = 0xFF - let mask = 0x0F - value & mask - "#; - let result = run(code); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_constant_fold_left_shift() { - let result = run_with_opt("5 << 2", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(20)); -} - -#[test] -fn test_constant_fold_right_shift() { - let result = run_with_opt("20 >> 2", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_constant_fold_and() { - let result = run_with_opt("12 & 10", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(8)); -} - -#[test] -fn test_constant_fold_or() { - let result = run_with_opt("12 | 10", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(14)); -} - -#[test] -fn test_constant_fold_xor() { - let result = run_with_opt("12 ^ 10", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(6)); -} - -#[test] -fn test_constant_fold_not() { - let result = run_with_opt("~5", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(-6)); -} - -#[test] -fn test_constant_fold_complex() { - // Complex expression that should be fully folded - let result = run_with_opt("(1 << 4) | (1 << 2) | (1 << 0)", OptimizationLevel::Basic); - // 16 + 4 + 1 = 21 - assert_eq!(result.as_int(), Some(21)); -} - -#[test] -fn test_constant_fold_nested() { - let result = run_with_opt("~(~5)", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_optimization_preserves_bitwise_semantics() { - let code = "5 << 2"; - - let result_o0 = run_with_opt(code, OptimizationLevel::None); - let result_o1 = run_with_opt(code, OptimizationLevel::Basic); - let result_o2 = run_with_opt(code, OptimizationLevel::Standard); - let result_o3 = run_with_opt(code, OptimizationLevel::Aggressive); - - assert_eq!(result_o0.as_int(), Some(20)); - assert_eq!(result_o1.as_int(), Some(20)); - assert_eq!(result_o2.as_int(), Some(20)); - assert_eq!(result_o3.as_int(), Some(20)); -} - -#[test] -fn test_optimization_preserves_complex_bitwise() { - let code = "(255 & 15) | (240 & 255)"; - - let result_o0 = run_with_opt(code, OptimizationLevel::None); - let result_o2 = run_with_opt(code, OptimizationLevel::Standard); - - // 15 | 240 = 255 - assert_eq!(result_o0.as_int(), Some(255)); - assert_eq!(result_o2.as_int(), Some(255)); -} - -#[test] -fn test_float_left_shift_fails() { - assert!(run_fails("1.5 << 2")); -} - -#[test] -fn test_float_right_shift_fails() { - assert!(run_fails("1.5 >> 2")); -} - -#[test] -fn test_float_bitwise_and_fails() { - assert!(run_fails("1.5 & 2")); -} - -#[test] -fn test_float_bitwise_or_fails() { - assert!(run_fails("1.5 | 2")); -} - -#[test] -fn test_float_bitwise_xor_fails() { - assert!(run_fails("1.5 ^ 2")); -} - -#[test] -fn test_float_bitwise_not_fails() { - assert!(run_fails("~1.5")); -} - -#[test] -fn test_float_on_right_side_fails() { - assert!(run_fails("5 << 2.0")); -} - -#[test] -fn test_both_floats_fail() { - assert!(run_fails("1.5 & 2.5")); -} - -#[test] -fn test_bitwise_in_function() { - let code = r#" - fn set_bit(value: int, bit: int) -> int { - value | (1 << bit) - } - set_bit(0, 3) - "#; - let result = run(code); - assert_eq!(result.as_int(), Some(8)); // 1 << 3 = 8 -} - -#[test] -fn test_bitwise_in_function_clear_bit() { - let code = r#" - fn clear_bit(value: int, bit: int) -> int { - value & ~(1 << bit) - } - clear_bit(15, 1) - "#; - let result = run(code); - // 15 = 1111, clear bit 1 -> 1101 = 13 - assert_eq!(result.as_int(), Some(13)); -} - -#[test] -fn test_bitwise_in_function_toggle_bit() { - let code = r#" - fn toggle_bit(value: int, bit: int) -> int { - value ^ (1 << bit) - } - toggle_bit(10, 2) - "#; - let result = run(code); - // 10 = 1010, toggle bit 2 -> 1110 = 14 - assert_eq!(result.as_int(), Some(14)); -} - -#[test] -fn test_bitwise_in_function_check_bit() { - let code = r#" - fn has_bit(value: int, bit: int) -> bool { - (value & (1 << bit)) != 0 - } - has_bit(10, 1) - "#; - let result = run(code); - // 10 = 1010, bit 1 is set - assert_eq!(result.as_bool(), Some(true)); -} - -// ============================================================================= -// Edge Cases - Shift Amount Wrap-Around (branchless behavior with & 63) -// ============================================================================= - -#[test] -fn test_negative_shift_wraps() { - // Negative shift amount wraps around (& 63) - // -1 & 63 = 63 (in two's complement, -1 has all bits set) - let code = r#" - let n = -1 - 1 << n - "#; - let result = run(code); - // Should be 1 << 63 (wrapped) - this overflows to negative in i64 - assert!(result.as_int().is_some()); -} - -#[test] -fn test_large_shift_wraps() { - // Shift by 64 wraps to 0: 64 & 63 = 0 - let code = r#" - let n = 64 - 1 << n - "#; - let result = run(code); - assert_eq!(result.as_int(), Some(1)); // 64 & 63 = 0, so 1 << 0 = 1 -} - -#[test] -fn test_large_right_shift_wraps() { - // 66 & 63 = 2, so 8 >> 66 = 8 >> 2 = 2 - let code = r#" - let n = 66 - 8 >> n - "#; - let result = run(code); - assert_eq!(result.as_int(), Some(2)); // 66 & 63 = 2, so 8 >> 2 = 2 -} - -#[test] -fn test_shift_by_63_succeeds() { - // Shift by 63 (max before wrap) should succeed - let code = r#" - let n = 63 - 1 << n - "#; - let result = run(code); - // 1 << 63 = i64::MIN (sign bit set) - assert!(result.as_int().is_some()); -} - -#[test] -fn test_shift_by_46_succeeds() { - // Shift by 46 should succeed and give a positive result - let code = r#" - let n = 46 - 1 << n - "#; - let result = run(code); - assert_eq!(result.as_int(), Some(1_i64 << 46)); -} - -// ============================================================================= -// Edge Cases - String Type Errors (compile-time) -// ============================================================================= - -#[test] -fn test_string_bitwise_and_fails() { - assert!(run_fails("\"hello\" & 5")); -} - -#[test] -fn test_string_bitwise_or_fails() { - assert!(run_fails("\"hello\" | 5")); -} - -#[test] -fn test_string_bitwise_xor_fails() { - assert!(run_fails("\"hello\" ^ 5")); -} - -#[test] -fn test_string_left_shift_fails() { - assert!(run_fails("\"hello\" << 2")); -} - -#[test] -fn test_string_right_shift_fails() { - assert!(run_fails("\"hello\" >> 2")); -} - -#[test] -fn test_string_bitwise_not_fails() { - assert!(run_fails("~\"hello\"")); -} - -// ============================================================================= -// Edge Cases - Boolean Type Errors (compile-time) -// ============================================================================= - -#[test] -fn test_bool_bitwise_and_fails() { - assert!(run_fails("true & 5")); -} - -#[test] -fn test_bool_bitwise_or_fails() { - assert!(run_fails("true | 5")); -} - -#[test] -fn test_bool_left_shift_fails() { - assert!(run_fails("true << 2")); -} - -#[test] -fn test_bool_bitwise_not_fails() { - assert!(run_fails("~true")); -} diff --git a/aelys/tests/builtins_tests.rs b/aelys/tests/builtins_tests.rs deleted file mode 100644 index d8594c0..0000000 --- a/aelys/tests/builtins_tests.rs +++ /dev/null @@ -1,38 +0,0 @@ -use aelys_runtime::{VM, Value, builtin_type, register_builtins}; -use aelys_syntax::Source; - -fn make_test_vm() -> VM { - let source = Source::new("test.aelys".to_string(), "".to_string()); - VM::new(source).unwrap() -} - -#[test] -fn test_register_builtins() { - let mut vm = make_test_vm(); - register_builtins(&mut vm).unwrap(); - - // Verify core built-ins are registered as globals - assert!(vm.get_global("type").is_some()); - assert!(vm.get_global("alloc").is_some()); - assert!(vm.get_global("free").is_some()); - assert!(vm.get_global("load").is_some()); - assert!(vm.get_global("store").is_some()); -} - -#[test] -fn test_builtin_type() { - let mut vm = make_test_vm(); - - // Test type() with different value types - let result = builtin_type(&mut vm, &[Value::int(42)]).unwrap(); - assert!(result.is_ptr()); // Returns a string - - let result = builtin_type(&mut vm, &[Value::float(2.72)]).unwrap(); - assert!(result.is_ptr()); - - let result = builtin_type(&mut vm, &[Value::bool(true)]).unwrap(); - assert!(result.is_ptr()); - - let result = builtin_type(&mut vm, &[Value::null()]).unwrap(); - assert!(result.is_ptr()); -} diff --git a/aelys/tests/byte_buffer_tests.rs b/aelys/tests/byte_buffer_tests.rs deleted file mode 100644 index 6607c74..0000000 --- a/aelys/tests/byte_buffer_tests.rs +++ /dev/null @@ -1,644 +0,0 @@ -mod common; -use common::{ - assert_aelys_bool, assert_aelys_int, assert_aelys_null, assert_aelys_str, run_aelys_err, - run_aelys_ok, -}; - -#[test] -fn test_bytes_alloc_and_size() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(100) - bytes.size(buf) - "#, - 100, - ); -} - -#[test] -fn test_bytes_alloc_zero_fails() { - let err = run_aelys_err( - r#" - needs std.bytes - bytes.alloc(0) - "#, - ); - assert!( - err.contains("positive") || err.contains("must be"), - "Expected error about positive size, got: {}", - err - ); -} - -#[test] -fn test_bytes_alloc_negative_fails() { - let err = run_aelys_err( - r#" - needs std.bytes - bytes.alloc(-10) - "#, - ); - assert!( - err.contains("positive") || err.contains("must be"), - "Expected error about positive size, got: {}", - err - ); -} - -#[test] -fn test_bytes_free() { - assert_aelys_null( - r#" - needs std.bytes - let buf = bytes.alloc(100) - bytes.free(buf) - "#, - ); -} - -#[test] -fn test_bytes_free_null_is_noop() { - assert_aelys_null( - r#" - needs std.bytes - bytes.free(null) - "#, - ); -} - -#[test] -fn test_bytes_read_write_u8() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u8(buf, 0, 42) - bytes.read_u8(buf, 0) - "#, - 42, - ); -} - -#[test] -fn test_bytes_u8_range() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u8(buf, 0, 0) - bytes.write_u8(buf, 1, 255) - bytes.read_u8(buf, 0) + bytes.read_u8(buf, 1) - "#, - 255, - ); -} - -#[test] -fn test_bytes_u8_out_of_range() { - let err = run_aelys_err( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u8(buf, 0, 256) - "#, - ); - assert!( - err.contains("range") || err.contains("0, 255"), - "Expected error about u8 range, got: {}", - err - ); -} - -#[test] -fn test_bytes_u8_out_of_bounds() { - let err = run_aelys_err( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.read_u8(buf, 10) - "#, - ); - assert!( - err.contains("offset") && err.contains("size"), - "Expected bounds error, got: {}", - err - ); -} - -#[test] -fn test_bytes_read_write_u16() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u16(buf, 0, 12345) - bytes.read_u16(buf, 0) - "#, - 12345, - ); -} - -#[test] -fn test_bytes_u16_little_endian() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u16(buf, 0, 258) - bytes.read_u8(buf, 0) - "#, - 2, - ); -} - -#[test] -fn test_bytes_read_write_u32() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u32(buf, 0, 123456789) - bytes.read_u32(buf, 0) - "#, - 123456789, - ); -} - -#[test] -fn test_bytes_u32_out_of_bounds() { - let err = run_aelys_err( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.read_u32(buf, 8) - "#, - ); - assert!( - err.contains("exceeds") || err.contains("offset"), - "Expected bounds error, got: {}", - err - ); -} - -#[test] -fn test_bytes_read_write_u64() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(16) - bytes.write_u64(buf, 0, 140737488355327) - bytes.read_u64(buf, 0) - "#, - 140737488355327_i64, - ); -} - -#[test] -#[allow(clippy::approx_constant)] -fn test_bytes_read_write_f32() { - let result = run_aelys_ok( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_f32(buf, 0, 3.14) - bytes.read_f32(buf, 0) - "#, - ); - let val = result.as_float().expect("Expected float result"); - assert!( - (val - 3.14).abs() < 0.01, - "Expected value close to 3.14, got {}", - val - ); -} - -#[test] -fn test_bytes_read_write_f64() { - let result = run_aelys_ok( - r#" - needs std.bytes - let buf = bytes.alloc(16) - bytes.write_f64(buf, 0, 3.141592653589793) - bytes.read_f64(buf, 0) - "#, - ); - let val = result.as_float().expect("Expected float result"); - assert!( - (val - std::f64::consts::PI).abs() < 1e-15, - "Expected value close to PI, got {}", - val - ); -} - -#[test] -fn test_bytes_fill() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.fill(buf, 2, 5, 42) - bytes.read_u8(buf, 0) + bytes.read_u8(buf, 2) + bytes.read_u8(buf, 6) - "#, - 84, - ); -} - -#[test] -fn test_bytes_copy_same_buffer() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(20) - bytes.write_u8(buf, 0, 11) - bytes.write_u8(buf, 1, 22) - bytes.write_u8(buf, 2, 33) - bytes.copy(buf, 0, buf, 10, 3) - bytes.read_u8(buf, 10) + bytes.read_u8(buf, 11) + bytes.read_u8(buf, 12) - "#, - 66, - ); -} - -#[test] -fn test_bytes_copy_different_buffers() { - assert_aelys_int( - r#" - needs std.bytes - let src = bytes.alloc(10) - let dst = bytes.alloc(10) - bytes.write_u8(src, 0, 100) - bytes.write_u8(src, 1, 200) - bytes.copy(src, 0, dst, 5, 2) - bytes.read_u8(dst, 5) + bytes.read_u8(dst, 6) - "#, - 300, - ); -} - -#[test] -fn test_bytes_invalid_handle() { - let err = run_aelys_err( - r#" - needs std.bytes - bytes.read_u8(99999, 0) - "#, - ); - assert!( - err.contains("invalid") || err.contains("handle"), - "Expected invalid handle error, got: {}", - err - ); -} - -#[test] -fn test_bytes_use_after_free() { - let err = run_aelys_err( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.free(buf) - bytes.read_u8(buf, 0) - "#, - ); - assert!( - err.contains("invalid") || err.contains("handle"), - "Expected invalid handle error after free, got: {}", - err - ); -} - -#[test] -fn test_bytes_negative_offset() { - let err = run_aelys_err( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.read_u8(buf, -1) - "#, - ); - assert!( - err.contains("negative"), - "Expected error about negative offset, got: {}", - err - ); -} - -#[test] -fn test_bytes_resize() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u8(buf, 0, 42) - bytes.resize(buf, 20) - bytes.size(buf) - "#, - 20, - ); -} - -#[test] -fn test_bytes_resize_preserves_data() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u8(buf, 0, 42) - bytes.resize(buf, 20) - bytes.read_u8(buf, 0) - "#, - 42, - ); -} - -#[test] -fn test_bytes_clone() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_u8(buf, 0, 42) - let buf2 = bytes.clone(buf) - bytes.write_u8(buf, 0, 0) - bytes.read_u8(buf2, 0) - "#, - 42, - ); -} - -#[test] -fn test_bytes_equals() { - assert_aelys_bool( - r#" - needs std.bytes - let a = bytes.alloc(4) - let b = bytes.alloc(4) - bytes.write_u32(a, 0, 12345) - bytes.write_u32(b, 0, 12345) - bytes.equals(a, b) - "#, - true, - ); -} - -#[test] -fn test_bytes_equals_different() { - assert_aelys_bool( - r#" - needs std.bytes - let a = bytes.alloc(4) - let b = bytes.alloc(4) - bytes.write_u32(a, 0, 12345) - bytes.write_u32(b, 0, 54321) - bytes.equals(a, b) - "#, - false, - ); -} - -#[test] -fn test_bytes_signed_i8() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(4) - bytes.write_i8(buf, 0, -128) - bytes.write_i8(buf, 1, 127) - bytes.read_i8(buf, 0) + bytes.read_i8(buf, 1) - "#, - -1, - ); -} - -#[test] -fn test_bytes_signed_i16() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(4) - bytes.write_i16(buf, 0, -32768) - bytes.read_i16(buf, 0) - "#, - -32768, - ); -} - -#[test] -fn test_bytes_signed_i32() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(8) - bytes.write_i32(buf, 0, -2147483648) - bytes.read_i32(buf, 0) - "#, - -2147483648, - ); -} - -#[test] -fn test_bytes_big_endian_u16() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(4) - bytes.write_u16_be(buf, 0, 0x1234) - bytes.read_u8(buf, 0) - "#, - 0x12, - ); -} - -#[test] -fn test_bytes_big_endian_u32() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(8) - bytes.write_u32_be(buf, 0, 0x12345678) - bytes.read_u8(buf, 0) - "#, - 0x12, - ); -} - -#[test] -fn test_bytes_big_endian_roundtrip() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(8) - bytes.write_u32_be(buf, 0, 0xDEADBEEF) - bytes.read_u32_be(buf, 0) - "#, - 0xDEADBEEF_u32 as i64, - ); -} - -#[test] -fn test_bytes_from_string() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.from_string("Hello") - bytes.size(buf) - "#, - 5, - ); -} - -#[test] -fn test_bytes_from_string_content() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.from_string("ABC") - bytes.read_u8(buf, 0) + bytes.read_u8(buf, 1) + bytes.read_u8(buf, 2) - "#, - 65 + 66 + 67, - ); -} - -#[test] -fn test_bytes_decode() { - assert_aelys_str( - r#" - needs std.bytes - let buf = bytes.alloc(5) - bytes.write_u8(buf, 0, 72) - bytes.write_u8(buf, 1, 101) - bytes.write_u8(buf, 2, 108) - bytes.write_u8(buf, 3, 108) - bytes.write_u8(buf, 4, 111) - bytes.decode(buf, 0, 5) - "#, - "Hello", - ); -} - -#[test] -fn test_bytes_write_string() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(10) - bytes.write_string(buf, 0, "Hi") - bytes.read_u8(buf, 0) + bytes.read_u8(buf, 1) - "#, - 72 + 105, - ); -} - -#[test] -fn test_bytes_write_string_returns_length() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(20) - bytes.write_string(buf, 0, "Hello, World!") - "#, - 13, - ); -} - -#[test] -fn test_bytes_find() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.from_string("Hello, World!") - bytes.find(buf, 0, -1, 111) - "#, - 4, - ); -} - -#[test] -fn test_bytes_find_not_found() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.from_string("Hello") - bytes.find(buf, 0, -1, 120) - "#, - -1, - ); -} - -#[test] -fn test_bytes_find_with_range() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.from_string("abcabc") - bytes.find(buf, 3, 6, 97) - "#, - 3, - ); -} - -#[test] -fn test_bytes_reverse() { - assert_aelys_str( - r#" - needs std.bytes - let buf = bytes.from_string("Hello") - bytes.reverse(buf, 0, 5) - bytes.decode(buf, 0, 5) - "#, - "olleH", - ); -} - -#[test] -fn test_bytes_swap() { - assert_aelys_int( - r#" - needs std.bytes - let buf = bytes.alloc(4) - bytes.write_u8(buf, 0, 10) - bytes.write_u8(buf, 3, 40) - bytes.swap(buf, 0, 3) - bytes.read_u8(buf, 0) + bytes.read_u8(buf, 3) - "#, - 50, - ); -} - -#[test] -#[allow(clippy::approx_constant)] -fn test_bytes_f32_big_endian() { - let result = run_aelys_ok( - r#" - needs std.bytes - let buf = bytes.alloc(8) - bytes.write_f32_be(buf, 0, 3.14) - bytes.read_f32_be(buf, 0) - "#, - ); - let val = result.as_float().expect("Expected float result"); - assert!( - (val - 3.14).abs() < 0.01, - "Expected value close to 3.14, got {}", - val - ); -} - -#[test] -fn test_bytes_f64_big_endian() { - let result = run_aelys_ok( - r#" - needs std.bytes - let buf = bytes.alloc(16) - bytes.write_f64_be(buf, 0, 3.141592653589793) - bytes.read_f64_be(buf, 0) - "#, - ); - let val = result.as_float().expect("Expected float result"); - assert!( - (val - std::f64::consts::PI).abs() < 1e-15, - "Expected value close to PI, got {}", - val - ); -} diff --git a/aelys/tests/cast_tests.rs b/aelys/tests/cast_tests.rs deleted file mode 100644 index db33fb7..0000000 --- a/aelys/tests/cast_tests.rs +++ /dev/null @@ -1,53 +0,0 @@ -mod common; -use common::*; - -#[test] -fn int_as_int() { - assert_aelys_int("42 as i32", 42); -} - -#[test] -fn int_as_float() { - let result = run_aelys("42 as f64"); - assert!(result.as_float().is_some() || result.as_int().is_some()); -} - -#[test] -fn float_as_int() { - let result = run_aelys("3.14 as i32"); - assert!(result.as_int().is_some() || result.as_float().is_some()); -} - -#[test] -fn bool_as_int() { - let result = run_aelys("true as i32"); - assert!(result.as_bool() == Some(true) || result.as_int() == Some(1)); -} - -#[test] -fn int_as_bool() { - let result = run_aelys("1 as bool"); - assert!(result.as_int() == Some(1) || result.as_bool() == Some(true)); -} - -#[test] -fn chained_cast() { - let result = run_aelys("42 as i32 as f64"); - assert!(result.as_int().is_some() || result.as_float().is_some()); -} - -#[test] -fn cast_in_expression() { - assert_aelys_int("let x = 10\nx as i32 + 5", 15); -} - -#[test] -fn cast_preserves_value() { - assert_aelys_int("100 as i64", 100); -} - -#[test] -fn invalid_cast_string_to_int_is_compile_error() { - let result = run_aelys_result(r#""hello" as i32"#); - assert!(result.is_err()); -} diff --git a/aelys/tests/closure_tests.rs b/aelys/tests/closure_tests.rs deleted file mode 100644 index 4144688..0000000 --- a/aelys/tests/closure_tests.rs +++ /dev/null @@ -1,497 +0,0 @@ -use aelys::run; -use aelys_runtime::Value; - -/// Helper to run code and expect success -fn run_ok(source: &str) -> Value { - run(source, "test.aelys").expect("Expected program to run successfully") -} - -// ============================================================================= -// Basic Closure Tests -// ============================================================================= - -#[test] -fn test_closure_basic_capture() { - // Closure captures immutable variable from enclosing scope - let result = run_ok( - r#" - fn make_adder(x) { - return fn(y) { return x + y } - } - let add5 = make_adder(5) - add5(10) - "#, - ); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_closure_capture_multiple_values() { - // Closure captures multiple variables - let result = run_ok( - r#" - fn make_calculator(a, b) { - return fn(op) { - if op == 1 { return a + b } - if op == 2 { return a - b } - if op == 3 { return a * b } - return a / b - } - } - let calc = make_calculator(10, 5) - calc(1) + calc(2) + calc(3) - "#, - ); - // (10+5) + (10-5) + (10*5) = 15 + 5 + 50 = 70 - assert_eq!(result.as_int(), Some(70)); -} - -#[test] -fn test_closure_returns_closure() { - // Function returns a closure that captures its parameter - let result = run_ok( - r#" - fn multiplier(factor) { - return fn(x) { return x * factor } - } - let double = multiplier(2) - let triple = multiplier(3) - double(5) + triple(5) - "#, - ); - // 10 + 15 = 25 - assert_eq!(result.as_int(), Some(25)); -} - -// ============================================================================= -// Mutable Upvalue Tests -// ============================================================================= - -#[test] -fn test_closure_mutable_counter() { - // Classic counter example with mutable upvalue - let result = run_ok( - r#" - fn make_counter() { - let mut count = 0 - return fn() { - count++ - return count - } - } - let counter = make_counter() - let a = counter() - let b = counter() - let c = counter() - a + b + c - "#, - ); - // 1 + 2 + 3 = 6 - assert_eq!(result.as_int(), Some(6)); -} - -#[test] -fn test_closure_separate_counters() { - // Two counters are independent - let result = run_ok( - r#" - fn make_counter() { - let mut count = 0 - return fn() { - count++ - return count - } - } - let counter1 = make_counter() - let counter2 = make_counter() - counter1() - counter1() - counter1() - counter2() - counter1() * 10 + counter2() - "#, - ); - // counter1 returns 4, counter2 returns 2 => 40 + 2 = 42 - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_closure_counter_with_step() { - // Counter with configurable step - let result = run_ok( - r#" - fn make_stepper(step_size) { - let mut count = 0 - return fn() { - count += step_size - return count - } - } - let by5 = make_stepper(5) - by5() - by5() - by5() - "#, - ); - // 5, 10, 15 -> returns 15 - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_closure_accumulator() { - // Accumulator that adds to running total - let result = run_ok( - r#" - fn make_accumulator() { - let mut total = 0 - return fn(n) { - total += n - return total - } - } - let acc = make_accumulator() - acc(10) - acc(20) - acc(5) - "#, - ); - // 10 + 20 + 5 = 35 - assert_eq!(result.as_int(), Some(35)); -} - -// ============================================================================= -// Nested Closure Tests -// ============================================================================= - -#[test] -fn test_closure_nested_capture() { - // Nested closures capturing from multiple levels - let result = run_ok( - r#" - fn outer(x) { - fn middle(y) { - return fn(z) { - return x + y + z - } - } - return middle(10) - } - let nested = outer(100) - nested(1) - "#, - ); - // 100 + 10 + 1 = 111 - assert_eq!(result.as_int(), Some(111)); -} - -#[test] -fn test_closure_deeply_nested() { - // Three levels of nesting - let result = run_ok( - r#" - fn level1(a) { - return fn(b) { - return fn(c) { - return fn(d) { - return a * 1000 + b * 100 + c * 10 + d - } - } - } - } - let f1 = level1(1) - let f2 = f1(2) - let f3 = f2(3) - f3(4) - "#, - ); - // 1*1000 + 2*100 + 3*10 + 4 = 1234 - assert_eq!(result.as_int(), Some(1234)); -} - -// ============================================================================= -// Block-Scoped Closing Tests -// ============================================================================= - -#[test] -fn test_closure_block_scoped_capture() { - // Variable captured in block, closure escapes block - let result = run_ok( - r#" - fn block_test() { - let mut result = 0 - { - let mut x = 10 - let capture = fn() { return x } - x = 20 - result = capture() - } - return result - } - block_test() - "#, - ); - // x was 20 when capture() was called - assert_eq!(result.as_int(), Some(20)); -} - -#[test] -fn test_closure_captures_after_block_exit() { - // Closure still works after block containing captured var exits - let result = run_ok( - r#" - fn test() { - let mut f = null - { - let x = 42 - f = fn() { return x } - } - return f() - } - test() - "#, - ); - assert_eq!(result.as_int(), Some(42)); -} - -// ============================================================================= -// Closure in Loop Tests -// ============================================================================= - -#[test] -fn test_closure_in_loop_capture_current() { - // Each iteration captures its own value - let result = run_ok( - r#" - fn loop_test() { - let mut sum = 0 - let mut i = 1 - while i <= 3 { - let val = i - let f = fn() { return val } - sum += f() - i++ - } - return sum - } - loop_test() - "#, - ); - // 1 + 2 + 3 = 6 - assert_eq!(result.as_int(), Some(6)); -} - -#[test] -fn test_closure_captures_loop_variable_by_ref() { - // Capture loop variable directly (all see final value) - let result = run_ok( - r#" - fn test() { - let mut i = 0 - let f = fn() { return i } - while i < 5 { - i++ - } - return f() - } - test() - "#, - ); - // f sees final value of i = 5 - assert_eq!(result.as_int(), Some(5)); -} - -// ============================================================================= -// Higher-Order Function Tests with Closures -// ============================================================================= - -#[test] -fn test_closure_as_callback() { - // Closure passed as callback - let result = run_ok( - r#" - fn apply_twice(f, x) { - return f(f(x)) - } - let double = fn(x) { return x * 2 } - apply_twice(double, 3) - "#, - ); - // double(double(3)) = double(6) = 12 - assert_eq!(result.as_int(), Some(12)); -} - -#[test] -fn test_closure_compose() { - // Compose two functions - let result = run_ok( - r#" - fn compose(f, g) { - return fn(x) { return f(g(x)) } - } - let add1 = fn(x) { return x + 1 } - let mul2 = fn(x) { return x * 2 } - let add1_then_mul2 = compose(mul2, add1) - add1_then_mul2(5) - "#, - ); - // (5 + 1) * 2 = 12 - assert_eq!(result.as_int(), Some(12)); -} - -#[test] -fn test_closure_partial_application() { - // Partial application pattern - let result = run_ok( - r#" - fn add(a) { - return fn(b) { - return a + b - } - } - let add10 = add(10) - let add20 = add(20) - add10(5) + add20(5) - "#, - ); - // 15 + 25 = 40 - assert_eq!(result.as_int(), Some(40)); -} - -// ============================================================================= -// Edge Cases -// ============================================================================= - -#[test] -fn test_closure_captures_bool() { - // Capture boolean value - toggle returns true, false, true - let result = run_ok( - r#" - fn make_toggle() { - let mut state = false - return fn() { - state = not state - return state - } - } - let toggle = make_toggle() - let a = toggle() - let b = toggle() - let c = toggle() - // a=true(1), b=false(0), c=true(1) => sum = 2 - let sum = 0 - let mut result = sum - if a { result++ } - if b { result++ } - if c { result++ } - result - "#, - ); - // a=true, b=false, c=true => 1 + 0 + 1 = 2 - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_closure_captures_null() { - // Capture and modify null - let result = run_ok( - r#" - fn test() { - let mut val = null - let setter = fn(v) { val = v } - let getter = fn() { return val } - setter(42) - return getter() - } - test() - "#, - ); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_closure_immediate_invoke() { - // Immediately invoked closure - let result = run_ok( - r#" - let x = 10 - let result = (fn(y) { return x + y })(5) - result - "#, - ); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_closure_recursive_with_capture() { - // Recursive function that also captures - let result = run_ok( - r#" - fn make_factorial(base) { - fn fact(n) { - if n <= 1 { return base } - return n * fact(n - 1) - } - return fact - } - let fact1 = make_factorial(1) - fact1(5) - "#, - ); - assert_eq!(result.as_int(), Some(120)); -} - -#[test] -fn test_closure_modifies_outer_mutable() { - // Closure modifies mutable variable in outer scope - let result = run_ok( - r#" - fn test() { - let mut x = 0 - let inc = fn() { x++ } - inc() - inc() - inc() - return x - } - test() - "#, - ); - assert_eq!(result.as_int(), Some(3)); -} - -#[test] -fn test_closure_with_float() { - // Capture float values - let result = run_ok( - r#" - fn make_scaler(factor) { - return fn(x) { return x * factor } - } - let half = make_scaler(0.5) - half(10.0) - "#, - ); - assert_eq!(result.as_float(), Some(5.0)); -} - -#[test] -fn test_closure_chain() { - // Chain of closures - let result = run_ok( - r#" - fn chain(initial) { - let mut value = initial - let add = fn(n) { value += n; return value } - let mul = fn(n) { value *= n; return value } - let get = fn() { return value } - add(5) - mul(2) - add(10) - return get() - } - chain(10) - "#, - ); - // (10 + 5) * 2 + 10 = 40 - assert_eq!(result.as_int(), Some(40)); -} diff --git a/aelys/tests/common/mod.rs b/aelys/tests/common/mod.rs deleted file mode 100644 index ed916c8..0000000 --- a/aelys/tests/common/mod.rs +++ /dev/null @@ -1,106 +0,0 @@ -//! Common test utilities for Aelys tests -#![allow(dead_code)] - -use aelys::{new_vm, run_with_vm_and_opt}; -use aelys_opt::OptimizationLevel; -use aelys_runtime::Value; - -/// Run Aelys source code and return the result value. -/// Panics on compilation or runtime errors. -/// Supports module imports (needs std.X). -pub fn run_aelys(source: &str) -> Value { - let mut vm = new_vm().expect("Failed to create VM"); - run_with_vm_and_opt(&mut vm, source, "", OptimizationLevel::Standard) - .expect("Aelys execution should succeed") -} - -/// Run Aelys source code and expect it to succeed. -/// Returns the Value result. -pub fn run_aelys_ok(source: &str) -> Value { - let mut vm = new_vm().expect("Failed to create VM"); - run_with_vm_and_opt(&mut vm, source, "", OptimizationLevel::Standard) - .expect("Expected success but got error") -} - -/// Run Aelys source code and expect it to fail. -/// Returns the error message. -pub fn run_aelys_err(source: &str) -> String { - let mut vm = new_vm().expect("Failed to create VM"); - match run_with_vm_and_opt(&mut vm, source, "", OptimizationLevel::Standard) { - Ok(v) => panic!("Expected error but got success: {:?}", v), - Err(e) => e.to_string(), - } -} - -/// Run Aelys source code and check if it returns the expected integer. -pub fn assert_aelys_int(source: &str, expected: i64) { - let result = run_aelys(source); - assert_eq!( - result.as_int(), - Some(expected), - "Expected int {} but got {:?}", - expected, - result - ); -} - -/// Run Aelys source code and check if it returns the expected boolean. -pub fn assert_aelys_bool(source: &str, expected: bool) { - let result = run_aelys(source); - assert_eq!( - result.as_bool(), - Some(expected), - "Expected bool {} but got {:?}", - expected, - result - ); -} - -/// Run Aelys source code and check if it returns null. -pub fn assert_aelys_null(source: &str) { - let result = run_aelys(source); - assert!(result.is_null(), "Expected null but got {:?}", result); -} - -/// Run Aelys source code and check if it returns the expected string. -pub fn assert_aelys_str(source: &str, expected: &str) { - use aelys::new_vm; - let mut vm = new_vm().expect("Failed to create VM"); - let result = aelys::run_with_vm_and_opt(&mut vm, source, "", OptimizationLevel::Standard) - .expect("Aelys execution should succeed"); - if let Some(ptr) = result.as_ptr() { - let heap = vm.heap(); - if let Some(obj) = heap.get(aelys_runtime::vm::GcRef::new(ptr)) - && let aelys_runtime::vm::ObjectKind::String(s) = &obj.kind - { - assert_eq!( - s.as_str(), - expected, - "Expected string '{}' but got '{}'", - expected, - s.as_str() - ); - return; - } - } - panic!("Expected string '{}' but got {:?}", expected, result); -} - -/// Run Aelys source code and check if it returns an error containing the given substring. -pub fn assert_aelys_error_contains(source: &str, expected_substring: &str) { - let err = run_aelys_err(source); - assert!( - err.contains(expected_substring), - "Expected error containing '{}' but got: {}", - expected_substring, - err - ); -} - -/// Run Aelys source code, allowing both success and error. -/// Returns Ok(Value) on success, Err(String) on error. -pub fn run_aelys_result(source: &str) -> Result { - let mut vm = new_vm().expect("Failed to create VM"); - run_with_vm_and_opt(&mut vm, source, "", OptimizationLevel::Standard) - .map_err(|e| e.to_string()) -} diff --git a/aelys/tests/compiler_call_tests.rs b/aelys/tests/compiler_call_tests.rs deleted file mode 100644 index caf41f4..0000000 --- a/aelys/tests/compiler_call_tests.rs +++ /dev/null @@ -1,24 +0,0 @@ -#[test] -fn arg_range_available_checks_bounds_and_usage() { - let mut pool = [false; 256]; - pool[3] = true; - - // This will need access to the private function, so we need to recreate the logic - // or make the function public for testing - let arg_range_available = |pool: &[bool; 256], start: usize, count: usize| -> bool { - if start + count > 256 { - return false; - } - - for slot in pool.iter().skip(start).take(count) { - if *slot { - return false; - } - } - true - }; - - assert!(arg_range_available(&pool, 0, 3)); - assert!(!arg_range_available(&pool, 2, 2)); - assert!(!arg_range_available(&pool, 254, 3)); -} diff --git a/aelys/tests/compiler_expr_tests.rs b/aelys/tests/compiler_expr_tests.rs deleted file mode 100644 index e8c0afd..0000000 --- a/aelys/tests/compiler_expr_tests.rs +++ /dev/null @@ -1,16 +0,0 @@ -#[test] -fn small_int_immediate_accepts_i16_range() { - let small_int_immediate = |n: i64| -> Option { - if (i16::MIN as i64) <= n && n <= (i16::MAX as i64) { - Some(n as i16) - } else { - None - } - }; - - assert_eq!(small_int_immediate(10), Some(10)); - assert_eq!(small_int_immediate(i16::MAX as i64), Some(i16::MAX)); - assert_eq!(small_int_immediate(i16::MIN as i64), Some(i16::MIN)); - assert_eq!(small_int_immediate(i16::MAX as i64 + 1), None); - assert_eq!(small_int_immediate(i16::MIN as i64 - 1), None); -} diff --git a/aelys/tests/compiler_functions_tests.rs b/aelys/tests/compiler_functions_tests.rs deleted file mode 100644 index 9ea01fe..0000000 --- a/aelys/tests/compiler_functions_tests.rs +++ /dev/null @@ -1,37 +0,0 @@ -use aelys_backend::compiler::Compiler; -use aelys_syntax::ast::{Expr, ExprKind, Function, Stmt, StmtKind}; -use aelys_syntax::{Source, Span}; - -#[test] -fn untyped_function_merges_call_site_slots_without_implicit_return() { - let span = Span::dummy(); - let source = Source::new("", ""); - let mut compiler = Compiler::new(None, source); - compiler.globals.insert("callee".to_string(), false); - - let call_expr = Expr::new( - ExprKind::Call { - callee: Box::new(Expr::new(ExprKind::Identifier("callee".to_string()), span)), - args: Vec::new(), - }, - span, - ); - - let func = Function { - name: "f".to_string(), - type_params: Vec::new(), - params: Vec::new(), - return_type: None, - body: vec![ - Stmt::new(StmtKind::Expression(call_expr), span), - Stmt::new(StmtKind::Return(None), span), - ], - decorators: Vec::new(), - is_pub: false, - span, - }; - - compiler.compile_function(&func).unwrap(); - - assert!(compiler.next_call_site_slot > 0); -} diff --git a/aelys/tests/compiler_tests.rs b/aelys/tests/compiler_tests.rs deleted file mode 100644 index b36987f..0000000 --- a/aelys/tests/compiler_tests.rs +++ /dev/null @@ -1,17 +0,0 @@ -use aelys_backend::Compiler; -use aelys_frontend::lexer::Lexer; -use aelys_frontend::parser::Parser; -use aelys_sema::TypeInference; -use aelys_syntax::Source; - -#[test] -fn compile_typed_program_to_bytecode() { - let src = Source::new("", "fn f() -> string { \"hi\" } f()"); - let tokens = Lexer::with_source(src.clone()).scan().unwrap(); - let ast = Parser::new(tokens, src.clone()).parse().unwrap(); - let typed = TypeInference::infer_program(ast, src.clone()).unwrap(); - - let (func, heap, _globals) = Compiler::new(None, src).compile_typed(&typed).unwrap(); - assert!(!func.bytecode.is_empty()); - assert!(heap.object_count() > 0); -} diff --git a/aelys/tests/compound_assign_tests.rs b/aelys/tests/compound_assign_tests.rs deleted file mode 100644 index bd2d7a6..0000000 --- a/aelys/tests/compound_assign_tests.rs +++ /dev/null @@ -1,270 +0,0 @@ -//! Tests for compound assignment (+=, -=, *=, /=, %=) and increment/decrement (++, --) - -mod common; -use common::*; - -// === Compound assignment on variables === - -#[test] -fn test_plus_eq() { - assert_aelys_int( - r#" -let mut x = 10 -x += 5 -x -"#, - 15, - ); -} - -#[test] -fn test_minus_eq() { - assert_aelys_int( - r#" -let mut x = 10 -x -= 3 -x -"#, - 7, - ); -} - -#[test] -fn test_star_eq() { - assert_aelys_int( - r#" -let mut x = 6 -x *= 7 -x -"#, - 42, - ); -} - -#[test] -fn test_slash_eq() { - assert_aelys_int( - r#" -let mut x = 20 -x /= 4 -x -"#, - 5, - ); -} - -#[test] -fn test_percent_eq() { - assert_aelys_int( - r#" -let mut x = 17 -x %= 5 -x -"#, - 2, - ); -} - -// === Increment / Decrement === - -#[test] -fn test_increment() { - assert_aelys_int( - r#" -let mut count = 0 -count++ -count++ -count++ -count -"#, - 3, - ); -} - -#[test] -fn test_decrement() { - assert_aelys_int( - r#" -let mut n = 10 -n-- -n-- -n -"#, - 8, - ); -} - -// === Compound assignment in loops === - -#[test] -fn test_plus_eq_in_for_loop() { - assert_aelys_int( - r#" -let mut sum = 0 -for i in 0..10 { - sum += i -} -sum -"#, - 45, - ); -} - -#[test] -fn test_star_eq_in_loop() { - assert_aelys_int( - r#" -let mut product = 1 -for i in 1..=5 { - product *= i -} -product -"#, - 120, - ); -} - -#[test] -fn test_increment_in_while_loop() { - assert_aelys_int( - r#" -let mut count = 0 -while count < 10 { - count++ -} -count -"#, - 10, - ); -} - -// === Compound assignment on array/vec index === - -#[test] -fn test_plus_eq_on_array_index() { - assert_aelys_int( - r#" -let arr = Array[10, 20, 30] -arr[1] += 5 -arr[1] -"#, - 25, - ); -} - -#[test] -fn test_minus_eq_on_array_index() { - assert_aelys_int( - r#" -let arr = Array[100, 200, 300] -arr[2] -= 50 -arr[2] -"#, - 250, - ); -} - -// === Compound assignment with floats === - -#[test] -fn test_plus_eq_float() { - let result = run_aelys( - r#" -let mut x = 1.5 -x += 2.5 -x -"#, - ); - assert_eq!(result.as_float(), Some(4.0)); -} - -// === String concatenation with += === - -#[test] -fn test_plus_eq_string() { - assert_aelys_str( - r#" -let mut s = "hello" -s += " world" -s -"#, - "hello world", - ); -} - -// === Chained compound assignments === - -#[test] -fn test_all_compound_ops_chained() { - assert_aelys_int( - r#" -let mut x = 10 -x += 5 // 15 -x -= 3 // 12 -x *= 2 // 24 -x /= 4 // 6 -x %= 5 // 1 -x -"#, - 1, - ); -} - -// === Compound assignment with expressions === - -#[test] -fn test_plus_eq_with_expression() { - assert_aelys_int( - r#" -let mut x = 10 -let y = 3 -x += y * 2 -x -"#, - 16, - ); -} - -// === Double negation still works === - -#[test] -fn test_double_negation_preserved() { - assert_aelys_int("--42", 42); -} - -// === Increment in function === - -#[test] -fn test_increment_in_function() { - assert_aelys_int( - r#" -fn count_up(n) { - let mut count = 0 - for i in 0..n { - count++ - } - return count -} -count_up(100) -"#, - 100, - ); -} - -// === Compound assign with mut param === - -#[test] -fn test_compound_assign_mut_param() { - assert_aelys_int( - r#" -fn accumulate(mut acc: int, n: int) -> int { - for i in 0..n { - acc += i - } - return acc -} -accumulate(10, 5) -"#, - 20, - ); -} diff --git a/aelys/tests/dx_improvement_tests.rs b/aelys/tests/dx_improvement_tests.rs deleted file mode 100644 index 970a703..0000000 --- a/aelys/tests/dx_improvement_tests.rs +++ /dev/null @@ -1,364 +0,0 @@ -mod common; - -use common::{assert_aelys_bool, assert_aelys_int, assert_aelys_str, run_aelys}; - -#[test] -fn test_print_no_newline() { - let result = run_aelys(r#"print("hello"); 42"#); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_println_works() { - let result = run_aelys(r#"println("hello"); 42"#); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_for_each_vec_int() { - assert_aelys_int( - r#" - let v = Vec[1, 2, 3] - let mut sum = 0 - for item in v { - sum += item - } - sum - "#, - 6, - ); -} - -#[test] -fn test_for_each_vec_string() { - assert_aelys_int( - r#" - let v = Vec[] - v.push("a") - v.push("b") - v.push("c") - let mut count = 0 - for item in v { - count++ - } - count - "#, - 3, - ); -} - -#[test] -fn test_for_each_vec_float() { - let result = run_aelys( - r#" - let v = Vec[1.0, 2.0, 3.0] - let mut sum = 0.0 - for x in v { - sum += x - } - sum - "#, - ); - assert_eq!(result.as_float(), Some(6.0)); -} - -#[test] -fn test_for_each_vec_in_function() { - assert_aelys_int( - r#" - fn sum_vec(v) { - let mut total = 0 - for item in v { - total += item - } - return total - } - let nums = Vec[10, 20, 30] - sum_vec(nums) - "#, - 60, - ); -} - -#[test] -fn test_for_each_vec_empty() { - // empty vec: loop body should not execute - assert_aelys_int( - r#" - let v = Vec[] - let mut count = 0 - for item in v { - count++ - } - count - "#, - 0, - ); -} - -#[test] -fn test_for_each_vec_break() { - assert_aelys_int( - r#" - let v = Vec[1, 2, 3, 4, 5] - let mut sum = 0 - for item in v { - if item == 3 { break } - sum += item - } - sum - "#, - 3, // 1 + 2 - ); -} - -#[test] -fn test_for_each_vec_continue() { - assert_aelys_int( - r#" - let v = Vec[1, 2, 3, 4, 5] - let mut sum = 0 - for item in v { - if item == 3 { continue } - sum += item - } - sum - "#, - 12, // 1 + 2 + 4 + 5 - ); -} - -#[test] -fn test_for_each_array_int() { - assert_aelys_int( - r#" - let arr = [10, 20, 30] - let mut sum = 0 - for item in arr { - sum += item - } - sum - "#, - 60, - ); -} - -#[test] -fn test_for_each_array_empty() { - assert_aelys_int( - r#" - let arr = [] - let mut count = 0 - for item in arr { - count++ - } - count - "#, - 0, - ); -} - -#[test] -fn test_for_each_array_bool() { - // count true values - assert_aelys_int( - r#" - let arr = [true, false, true, true, false] - let mut count = 0 - for item in arr { - if item { count++ } - } - count - "#, - 3, - ); -} - -#[test] -fn test_for_each_array_break() { - assert_aelys_int( - r#" - let arr = [1, 2, 3, 4, 5] - let mut sum = 0 - for item in arr { - if item > 3 { break } - sum += item - } - sum - "#, - 6, // 1 + 2 + 3 - ); -} - -#[test] -fn test_for_each_array_float() { - let result = run_aelys( - r#" - let arr = [1.5, 2.5, 3.0] - let mut sum = 0.0 - for x in arr { - sum += x - } - sum - "#, - ); - assert_eq!(result.as_float(), Some(7.0)); -} - -#[test] -fn test_for_each_nested_vec() { - assert_aelys_int( - r#" - let rows = Vec[Vec[1, 2], Vec[3, 4], Vec[5, 6]] - let mut sum = 0 - for row in rows { - for item in row { - sum += item - } - } - sum - "#, - 21, - ); -} - -#[test] -fn test_string_method_len() { - assert_aelys_int(r#""hello".len()"#, 5); -} - -#[test] -fn test_string_method_trim() { - assert_aelys_str(r#"" hi ".trim()"#, "hi"); -} - -#[test] -fn test_string_method_contains() { - assert_aelys_bool(r#""hello world".contains("world")"#, true); - assert_aelys_bool(r#""hello".contains("xyz")"#, false); -} - -#[test] -fn test_string_method_to_upper() { - assert_aelys_str(r#""hello".to_upper()"#, "HELLO"); -} - -#[test] -fn test_string_method_to_lower() { - assert_aelys_str(r#""HELLO".to_lower()"#, "hello"); -} - -#[test] -fn test_string_method_starts_with() { - assert_aelys_bool(r#""hello world".starts_with("hello")"#, true); - assert_aelys_bool(r#""hello world".starts_with("world")"#, false); -} - -#[test] -fn test_string_method_ends_with() { - assert_aelys_bool(r#""hello world".ends_with("world")"#, true); - assert_aelys_bool(r#""hello world".ends_with("hello")"#, false); -} - -#[test] -fn test_string_method_replace() { - assert_aelys_str(r#""aaa".replace("a", "b")"#, "bbb"); -} - -#[test] -fn test_string_method_is_empty() { - assert_aelys_bool(r#""".is_empty()"#, true); - assert_aelys_bool(r#""hello".is_empty()"#, false); -} - -#[test] -fn test_string_method_repeat() { - assert_aelys_str(r#""ab".repeat(3)"#, "ababab"); -} - -#[test] -fn test_string_method_capitalize() { - assert_aelys_str(r#""hello".capitalize()"#, "Hello"); -} - -#[test] -fn test_string_method_reverse() { - assert_aelys_str(r#""abc".reverse()"#, "cba"); -} - -#[test] -fn test_string_method_trim_start() { - assert_aelys_str(r#"" hi ".trim_start()"#, "hi "); -} - -#[test] -fn test_string_method_trim_end() { - assert_aelys_str(r#"" hi ".trim_end()"#, " hi"); -} - -#[test] -fn test_string_method_find() { - assert_aelys_int(r#""hello world".find("world")"#, 6); - assert_aelys_int(r#""hello".find("xyz")"#, -1); -} - -#[test] -fn test_string_method_count() { - assert_aelys_int(r#""banana".count("a")"#, 3); -} - -#[test] -fn test_string_method_char_len() { - assert_aelys_int(r#""hello".char_len()"#, 5); -} - -#[test] -fn test_string_method_is_numeric() { - assert_aelys_bool(r#""123".is_numeric()"#, true); - assert_aelys_bool(r#""12a".is_numeric()"#, false); -} - -#[test] -fn test_string_method_is_alphabetic() { - assert_aelys_bool(r#""abc".is_alphabetic()"#, true); - assert_aelys_bool(r#""a1c".is_alphabetic()"#, false); -} - -#[test] -fn test_string_method_on_variable() { - assert_aelys_str( - r#" - let s = " hello " - s.trim() - "#, - "hello", - ); -} - -#[test] -fn test_string_method_chaining() { - // Chain: trim then to_upper - assert_aelys_str( - r#" - let s = " hello " - let trimmed = s.trim() - trimmed.to_upper() - "#, - "HELLO", - ); -} - -#[test] -fn test_to_string_int() { - assert_aelys_str(r#"let x = 42; x.to_string()"#, "42"); -} - -#[test] -fn test_to_string_bool() { - assert_aelys_str(r#"let x = true; x.to_string()"#, "true"); -} - -#[test] -fn test_to_string_null() { - assert_aelys_str("let x = null; x.to_string()", "null"); -} diff --git a/aelys/tests/edge_cases_tests.rs b/aelys/tests/edge_cases_tests.rs deleted file mode 100644 index 85725fb..0000000 --- a/aelys/tests/edge_cases_tests.rs +++ /dev/null @@ -1,795 +0,0 @@ -mod common; -use common::*; - -// Unicode edge cases - -#[test] -fn unicode_surrogate_pairs() { - let code = r#" -let s = "𝕳𝖊𝖑𝖑𝖔" -s.char_len() -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn unicode_rtl_text_handling() { - let code = r#" -let arabic = "مرحبا بك" -let len = arabic.char_len() -if len > 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn unicode_combining_characters() { - let code = r#" -let s = "é" -s.len() -"#; - // Should be 2 bytes for composed form - let result = run_aelys(code); - assert!(result.as_int().unwrap() > 0); -} - -#[test] -fn unicode_zero_width_characters() { - let code = "let s = \"a\u{200B}b\"\ns.len()\n"; - let result = run_aelys(code); - assert!(result.as_int().unwrap() > 2); -} - -#[test] -fn unicode_emoji_sequences() { - let code = r#" -let emoji = "👨‍👩‍👧‍👦" -let byte_len = emoji.len() -let char_len = emoji.char_len() -if byte_len > char_len { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -// String literal max length tests - -#[test] -fn very_long_string_literal() { - let long_str = "x".repeat(10000); - let code = format!( - r#" -let s = "{}" -s.len() -"#, - long_str - ); - assert_aelys_int(&code, 10000); -} - -#[test] -fn extremely_long_string_concat() { - let code = r#" -let mut s = "" -let mut i = 0 -while i < 1000 { - s += "x" - i++ -} -s.len() -"#; - assert_aelys_int(code, 1000); -} - -// Recursion at MAX_FRAMES - -#[test] -fn recursion_near_max_frames() { - let code = r#" -fn recurse(n) { - if n <= 0 { - return 1 - } - return recurse(n - 1) -} -recurse(500) -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn mutual_recursion_deep() { - let code = r#" -fn even(n) { - if n == 0 { return true } - if n == 1 { return false } - return odd(n - 1) -} -fn odd(n) { - if n == 0 { return false } - if n == 1 { return true } - return even(n - 1) -} -if even(200) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -// GC during allocation - -#[test] -fn gc_stress_many_allocations() { - let code = r#" -let mut i = 0 -while i < 10000 { - let s = "test string number " + "more text" - i++ -} -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn gc_with_circular_references() { - // Aelys doesn't have mutable data structures that can create cycles - // But we can stress GC with many allocations - let code = r#" -fn make_strings(n) { - if n <= 0 { return 0 } - let s = "string " + "concat" - return make_strings(n - 1) + 1 -} -make_strings(1000) -"#; - assert_aelys_int(code, 1000); -} - -// Type system edge cases - -#[test] -fn deeply_nested_function_types() { - let code = r#" -fn f1() { return fn() { return fn() { return fn() { return 42 } } } } -let result = f1()()()() -result -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn closure_capturing_many_variables() { - let code = r#" -fn make_closure() { - let a = 1 - let b = 2 - let c = 3 - let d = 4 - let e = 5 - let f = 6 - let g = 7 - let h = 8 - return fn() { return a + b + c + d + e + f + g + h } -} -let closure = make_closure() -closure() -"#; - assert_aelys_int(code, 36); -} - -// Integer boundary values - -#[test] -fn int_max_value() { - let code = r#" -let max = 140737488355327 -if max > 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn int_overflow_handling() { - let code = r#" -let large = 140737488355327 -let result = pow(large, 2) -if result > 0.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -// Float special values - -#[test] -fn float_infinity_operations() { - let code = r#" -let inf = INF -let result = inf + 1.0 -if is_inf(result) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn float_nan_propagation() { - let code = r#" -let nan = sqrt(-1.0) -let result = nan + 5.0 -if is_nan(result) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn division_by_very_small() { - let code = r#" -let result = 1.0 / 0.0000000001 -if result > 1000000000.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -// Empty collections and edge cases - -#[test] -fn empty_string_operations() { - let code = r#" -let s = "" -let rev = s.reverse() -let upper = s.to_upper() -if s.is_empty() { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_split_on_empty() { - let code = r#" -let parts = "".split(",") -42 -"#; - assert_aelys_int(code, 42); -} - -// Boundary conditions for loops - -#[test] -fn loop_zero_iterations() { - let code = r#" -let mut sum = 0 -let mut i = 0 -while i < 0 { - sum += i - i++ -} -sum -"#; - assert_aelys_int(code, 0); -} - -#[test] -fn loop_one_iteration() { - let code = r#" -let mut sum = 0 -let mut i = 0 -while i < 1 { - sum += i - i++ -} -sum -"#; - assert_aelys_int(code, 0); -} - -#[test] -fn nested_loops_deep() { - let code = r#" -let mut count = 0 -let mut i = 0 -while i < 10 { - let mut j = 0 - while j < 10 { - let mut k = 0 - while k < 10 { - count++ - k++ - } - j++ - } - i++ -} -count -"#; - assert_aelys_int(code, 1000); -} - -// Variable shadowing edge cases - -#[test] -fn extreme_variable_shadowing() { - let code = r#" -let x = 1 -{ - let x = 2 - { - let x = 3 - { - let x = 4 - { - let x = 5 - } - } - } -} -x -"#; - assert_aelys_int(code, 1); -} - -// Function parameter edge cases - -#[test] -fn function_with_zero_params() { - let code = r#" -fn f() { return 42 } -f() -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn function_with_many_params() { - let code = r#" -fn add10(a, b, c, d, e, f, g, h, i, j) { - return a + b + c + d + e + f + g + h + i + j -} -add10(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) -"#; - assert_aelys_int(code, 55); -} - -// Whitespace and formatting edge cases - -#[test] -fn code_with_excessive_whitespace() { - let code = r#" -let x = 42 - - -x -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn code_with_tabs() { - let code = "let\tx\t=\t42\nx"; - assert_aelys_int(code, 42); -} - -// Math edge cases - -#[test] -fn sqrt_zero() { - let code = r#" -let r = sqrt(0.0) -if r == 0.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn log_one() { - let code = r#" -let r = log(1.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn pow_zero_exponent() { - let code = r#" -let r = pow(123.456, 0.0) -if r > 0.99 and r < 1.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn trig_large_angles() { - let code = r#" -let r = sin(1000000.0) -if r >= -1.0 and r <= 1.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn variable_in_function_accessed_in_for_loop() { - let code = r#" -fn foo() { - let x = 42 - let mut sum = 0 - for i in 0..5 { - sum += x - } - return sum -} -foo() -"#; - assert_aelys_int(code, 210); -} - -#[test] -fn multiple_variables_in_function_accessed_in_for_loop() { - let code = r#" -fn foo() { - let a = 10 - let b = 20 - let c = 30 - let mut sum = 0 - for i in 0..3 { - sum += a + b + c - } - return sum -} -foo() -"#; - assert_aelys_int(code, 180); -} - -#[test] -fn variable_in_function_accessed_in_while_loop() { - let code = r#" -fn foo() { - let x = 42 - let mut sum = 0 - let mut i = 0 - while i < 5 { - sum += x - i++ - } - return sum -} -foo() -"#; - assert_aelys_int(code, 210); -} - -#[test] -fn variable_in_function_nested_for_loops() { - let code = r#" -fn foo() { - let x = 1 - let mut count = 0 - for i in 0..5 { - for j in 0..5 { - count += x - } - } - return count -} -foo() -"#; - assert_aelys_int(code, 25); -} - -#[test] -fn string_variable_in_function_for_loop() { - let code = r#" -fn foo() { - let prefix = "hello" - let mut count = 0 - for i in 0..3 { - let len = prefix.len() - if len > 0 { - count++ - } - } - return count -} -foo() -"#; - assert_aelys_int(code, 3); -} - -#[test] -fn variable_defined_before_function_used_in_loop_via_closure() { - let code = r#" -fn make_adder(x) { - return fn(y) { return x + y } -} -let adder = make_adder(10) -let mut sum = 0 -for i in 0..5 { - sum += adder(i) -} -sum -"#; - assert_aelys_int(code, 60); -} - -#[test] -fn for_loop_register_collision_stress() { - let code = r#" -fn test() { - let a = 1 - let b = 2 - let c = 3 - let d = 4 - let e = 5 - let mut total = 0 - for i in 0..10 { - total += a + b + c + d + e - } - return total -} -test() -"#; - assert_aelys_int(code, 150); -} - -// Parameter accessed in for loop -#[test] -fn parameter_accessed_in_for_loop() { - let code = r#" -fn foo(x) { - let mut sum = 0 - for i in 0..5 { - sum += x - } - return sum -} -foo(42) -"#; - assert_aelys_int(code, 210); -} - -// Closure captures variable used in for loop -#[test] -fn closure_captures_and_for_loop() { - let code = r#" -fn make_value() { - let x = 42 - fn inner() { - return x - } - let mut sum = 0 - for i in 0..5 { - sum += inner() - } - return sum -} -make_value() -"#; - assert_aelys_int(code, 210); -} - -// Global variable accessed in function's for loop -#[test] -fn global_var_in_function_for_loop() { - let code = r#" -let x = 42 -fn foo() { - let mut sum = 0 - for i in 0..5 { - sum += x - } - return sum -} -foo() -"#; - assert_aelys_int(code, 210); -} - -// Variable in nested scope before for loop -#[test] -fn nested_scope_before_for_loop() { - let code = r#" -fn foo() { - let x = 42 - { - let tmp = x + 1 - } - let mut sum = 0 - for i in 0..5 { - sum += x - } - return sum -} -foo() -"#; - assert_aelys_int(code, 210); -} - -// Variable shadowed in nested scope then used in for loop -#[test] -fn variable_used_after_inner_scope_in_for_loop() { - let code = r#" -fn foo() { - let x = 10 - let y = 20 - { - let x = 100 - let z = x + y - } - let mut sum = 0 - for i in 0..3 { - sum += x + y - } - return sum -} -foo() -"#; - assert_aelys_int(code, 90); -} - -// Function call inside for loop -#[test] -fn function_call_inside_for_loop() { - let code = r#" -fn double(n) { - return n * 2 -} -fn test() { - let mut sum = 0 - for i in 0..5 { - sum += double(i) - } - return sum -} -test() -"#; - assert_aelys_int(code, 20); -} - -// Multiple for loops in sequence -#[test] -fn multiple_for_loops_same_function() { - let code = r#" -fn test() { - let x = 10 - let mut sum = 0 - for i in 0..5 { - sum += x - } - for j in 0..3 { - sum += x - } - return sum -} -test() -"#; - assert_aelys_int(code, 80); -} - -// Mutable variable modified both inside and outside for loop -#[test] -fn mutable_var_modified_in_and_out_of_for_loop() { - let code = r#" -fn test() { - let mut x = 0 - x = 5 - for i in 0..3 { - x++ - } - return x -} -test() -"#; - assert_aelys_int(code, 8); -} - -// String concat in for loop (GC stress with heap objects) -#[test] -fn string_concat_in_for_loop_function() { - let code = r#" -fn test() { - let mut s = "" - for i in 0..5 { - s += "x" - } - return s.len() -} -test() -"#; - assert_aelys_int(code, 5); -} - -// For loop with variable-based range -#[test] -fn for_loop_with_variable_range() { - let code = r#" -fn test() { - let start = 0 - let end_val = 5 - let x = 10 - let mut sum = 0 - for i in start..end_val { - sum += x + i - } - return sum -} -test() -"#; - assert_aelys_int(code, 60); -} - -// Nested function with for loop accessing outer vars via closure -#[test] -fn nested_fn_with_for_loop_closure() { - let code = r#" -fn outer() { - let x = 10 - fn inner() { - let mut sum = 0 - for i in 0..5 { - sum += x - } - return sum - } - return inner() -} -outer() -"#; - assert_aelys_int(code, 50); -} - -// String interpolation with for-loop iterator, the original bug report by Selofaney -#[test] -fn string_interpolation_for_loop_variable() { - let code = r#" -fn test() { - let mut result = "" - for i in 0..3 { - result = "last={i}" - } - return result -} -test() -"#; - assert_aelys_str(code, "last=2"); -} - -#[test] -fn string_interpolation_variable_in_loop() { - let code = r#" -fn test() { - let x = 42 - let mut s = "" - for i in 0..1 { - s = "x={x}" - } - return s -} -test() -"#; - assert_aelys_str(code, "x=42"); -} - -#[test] -fn string_interpolation_multiple_vars() { - let code = r#" -let a = 10 -let b = 20 -"{a}+{b}" -"#; - assert_aelys_str(code, "10+20"); -} - -// Lambda inside for loop capturing loop variable -#[test] -fn lambda_inside_for_loop() { - let code = r#" -fn test() { - let mut sum = 0 - for i in 0..5 { - let f = fn() { return i } - sum += f() - } - return sum -} -test() -"#; - assert_aelys_int(code, 10); -} diff --git a/aelys/tests/enum_parse_tests.rs b/aelys/tests/enum_parse_tests.rs new file mode 100644 index 0000000..2faddd3 --- /dev/null +++ b/aelys/tests/enum_parse_tests.rs @@ -0,0 +1,1558 @@ +use aelys::api::compile_to_typed_ast; +use aelys_air::AirType; +use aelys_air::lower::lower; +use aelys_air::print::print_program; +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn lower_source(code: &str) -> aelys_air::AirProgram { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().unwrap(); + let ast = Parser::new(tokens, src.clone()).parse().unwrap(); + let typed = TypeInference::infer_program(ast, src).unwrap(); + lower(&typed) +} + +fn lower_and_monomorphize(code: &str) -> aelys_air::AirProgram { + let air = lower_source(code); + aelys_air::mono::monomorphize(air).unwrap() +} + +#[test] +fn parse_basic_enum_declaration() { + let src = r#" +enum Color { + Red, + Green, + Blue, +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "basic enum should parse: {:?}", + result.err() + ); +} + +#[test] +fn parse_enum_variant_construction() { + let src = r#" +enum Color { + Red, + Green, + Blue, +} + +let c = Color::Red +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "enum construction should type-check: {:?}", + result.err() + ); +} + +#[test] +fn enum_variant_type_is_enum() { + let src = r#" +enum Color { + Red, + Green, + Blue, +} + +let a = Color::Red +let b = Color::Green +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "multiple variant constructions should type-check: {:?}", + result.err() + ); +} + +#[test] +fn enum_variant_type_error_unknown_variant() { + let src = r#" +enum Color { Red, Green, Blue } +let c = Color::Yellow +"#; + let result = compile_to_typed_ast(src); + assert!(result.is_err(), "unknown variant should fail type check"); +} + +#[test] +fn enum_variant_type_error_unknown_enum() { + let src = r#" +let c = Bogus::Foo +"#; + let result = compile_to_typed_ast(src); + assert!(result.is_err(), "unknown enum should fail type check"); +} + +#[test] +fn enum_variants_unify_same_enum() { + let src = r#" +enum Direction { Up, Down, Left, Right } + +fn choose(flag: bool) -> Direction { + if flag { + return Direction::Up + } + return Direction::Down +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "same-enum variants should unify: {:?}", + result.err() + ); +} + +#[test] +fn enum_mismatch_different_enums() { + let src = r#" +enum Color { Red, Green } +enum Shape { Circle, Square } + +fn bad() -> Color { + return Shape::Circle +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "different enum types should not unify with return type" + ); +} + +#[test] +fn enum_as_function_param() { + let src = r#" +enum Color { Red, Green, Blue } + +fn paint(c: Color) -> i64 { + return 1 +} + +let x = paint(Color::Red) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "enum as function param should type-check: {:?}", + result.err() + ); +} + +// ============ Data Variant Tests ============ + +#[test] +fn data_variant_parse_and_typecheck() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} +let msg = Message::Write("hello") +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "data variant should type-check: {:?}", + result.err() + ); +} + +#[test] +fn data_variant_unit_still_works() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} +let msg = Message::Quit +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "unit variant in data enum should type-check: {:?}", + result.err() + ); +} + +#[test] +fn data_variant_multiple_fields() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} +let msg = Message::Move(10, 20) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "multi-field data variant should type-check: {:?}", + result.err() + ); +} + +#[test] +fn data_variant_wrong_arg_count() { + let src = r#" +enum Message { Quit, Move(i64, i64) } +let msg = Message::Move(1) +"#; + let result = compile_to_typed_ast(src); + assert!(result.is_err(), "wrong arg count should fail"); +} + +#[test] +fn data_variant_too_many_args() { + let src = r#" +enum Message { Quit, Move(i64, i64) } +let msg = Message::Move(1, 2, 3) +"#; + let result = compile_to_typed_ast(src); + assert!(result.is_err(), "too many args should fail"); +} + +#[test] +fn data_variant_wrong_arg_type() { + let src = r#" +enum Message { Write(string) } +let msg = Message::Write(42) +"#; + let result = compile_to_typed_ast(src); + assert!(result.is_err(), "wrong arg type should fail"); +} + +#[test] +fn unit_variant_with_args_fails() { + let src = r#" +enum Color { Red, Green } +let c = Color::Red(42) +"#; + let result = compile_to_typed_ast(src); + assert!(result.is_err(), "unit variant with args should fail"); +} + +#[test] +fn data_variant_as_function_param() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} + +fn handle(m: Message) -> i64 { + return 0 +} + +let result = handle(Message::Write("test")) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "data variant as function param should type-check: {:?}", + result.err() + ); +} + +#[test] +fn data_variant_return_type_unifies() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} + +fn make_msg(flag: bool) -> Message { + if flag { + return Message::Quit + } + return Message::Write("hello") +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "mixed variants should unify return type: {:?}", + result.err() + ); +} + +#[test] +fn data_variant_air_lowering() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} +fn use_write() -> Message { + return Message::Write("hello") +} +"#; + let air = lower_source(src); + // Verify the enum def exists in the AIR program + let enum_def = air.enums.iter().find(|e| e.name == "Message"); + assert!(enum_def.is_some(), "Message enum should exist in AIR"); + let def = enum_def.unwrap(); + // Verify the data variant has payload types + let write_variant = def.variants.iter().find(|v| v.name == "Write"); + assert!(write_variant.is_some(), "Write variant should exist"); + assert_eq!( + write_variant.unwrap().payload.len(), + 1, + "Write variant should have 1 payload field" + ); +} + +#[test] +fn data_variant_unit_in_data_enum_air() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} +let msg = Message::Quit +"#; + let air = lower_source(src); + let enum_def = air.enums.iter().find(|e| e.name == "Message"); + assert!(enum_def.is_some(), "Message enum should exist in AIR"); + let def = enum_def.unwrap(); + let quit_variant = def.variants.iter().find(|v| v.name == "Quit"); + assert!(quit_variant.is_some(), "Quit variant should exist"); + assert!( + quit_variant.unwrap().payload.is_empty(), + "Quit variant should have no payload" + ); +} + +#[test] +fn data_variant_multi_field_air() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} +let msg = Message::Move(10, 20) +"#; + let air = lower_source(src); + let enum_def = air.enums.iter().find(|e| e.name == "Message"); + assert!(enum_def.is_some(), "Message enum should exist in AIR"); + let def = enum_def.unwrap(); + let move_variant = def.variants.iter().find(|v| v.name == "Move"); + assert!(move_variant.is_some(), "Move variant should exist"); + assert_eq!( + move_variant.unwrap().payload.len(), + 2, + "Move variant should have 2 payload fields" + ); +} + +#[test] +fn simple_enum_still_works_after_data_variant_changes() { + let src = r#" +enum Color { Red, Green, Blue } +let c = Color::Red +"#; + let air = lower_source(src); + let enum_def = air.enums.iter().find(|e| e.name == "Color"); + assert!(enum_def.is_some(), "Color enum should exist in AIR"); + let def = enum_def.unwrap(); + // All variants should have empty payload (simple enum) + for v in &def.variants { + assert!( + v.payload.is_empty(), + "simple enum variant {} should have no payload", + v.name + ); + } +} + +#[test] +fn data_variant_bool_field() { + let src = r#" +enum Event { + Click(i64, i64), + Toggle(bool), +} +let e = Event::Toggle(true) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "bool field data variant should type-check: {:?}", + result.err() + ); +} + +#[test] +fn data_variant_with_mixed_alignment_fields() { + let src = r#" +enum Message { + Quit, + Mixed(i64, bool, i64), +} +let msg = Message::Mixed(1, true, 3) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "data variant with mixed alignment should type-check: {:?}", + result.err() + ); +} + +#[test] +fn data_variant_mixed_types() { + let src = r#" +enum Data { + IntVal(i64), + FloatVal(f64), + StrVal(string), + BoolVal(bool), +} +let d1 = Data::IntVal(42) +let d2 = Data::FloatVal(3.14) +let d3 = Data::StrVal("hello") +let d4 = Data::BoolVal(false) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "mixed-type data variants should type-check: {:?}", + result.err() + ); +} + +// ============ Match Expression Tests ============ + +#[test] +fn match_simple_enum_exhaustive() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + Color::Red => 1, + Color::Green => 2, + Color::Blue => 3, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "exhaustive match on simple enum should type-check: {:?}", + result.err() + ); +} + +#[test] +fn match_with_wildcard() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + Color::Red => 1, + _ => 0, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "match with wildcard should type-check: {:?}", + result.err() + ); +} + +#[test] +fn match_wildcard_must_be_last() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + _ => 0, + Color::Red => 1, + } +} +"#; + let result = compile_to_typed_ast(src); + let errors = result.expect_err("wildcard before specific arms should fail"); + let rendered = format!("{:?}", errors); + assert!( + rendered.contains("wildcard pattern must be the last match arm"), + "unexpected diagnostics: {rendered}" + ); +} + +#[test] +fn match_non_exhaustive_error() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + Color::Red => 1, + Color::Green => 2, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "non-exhaustive match should fail type check" + ); +} + +#[test] +fn match_duplicate_variant_error() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + Color::Red => 1, + Color::Red => 2, + Color::Green => 3, + Color::Blue => 4, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "duplicate variant pattern should fail type check" + ); +} + +#[test] +fn match_wrong_enum_in_pattern() { + let src = r#" +enum Color { Red, Green, Blue } +enum Shape { Circle, Square } + +fn name(c: Color) -> i64 { + return match c { + Shape::Circle => 1, + _ => 0, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "wrong enum in pattern should fail type check" + ); +} + +#[test] +fn match_unknown_variant_error() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + Color::Yellow => 1, + _ => 0, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "unknown variant in pattern should fail type check" + ); +} + +#[test] +fn match_data_variant_with_bindings() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} + +fn handle(m: Message) -> i64 { + return match m { + Message::Quit => 0, + Message::Move(x, y) => x, + Message::Write(text) => 1, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "match with data variant bindings should type-check: {:?}", + result.err() + ); +} + +#[test] +fn match_data_variant_wrong_binding_count() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), +} + +fn handle(m: Message) -> i64 { + return match m { + Message::Quit => 0, + Message::Move(x) => x, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "wrong binding count should fail type check" + ); +} + +#[test] +fn match_as_expression_in_let() { + let src = r#" +enum Color { Red, Green, Blue } + +fn test(c: Color) -> i64 { + let x = match c { + Color::Red => 10, + Color::Green => 20, + Color::Blue => 30, + } + return x +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "match as expression in let should type-check: {:?}", + result.err() + ); +} + +#[test] +fn match_arms_type_mismatch() { + let src = r#" +enum Color { Red, Green, Blue } + +fn test(c: Color) -> i64 { + return match c { + Color::Red => 1, + Color::Green => "hello", + Color::Blue => 3, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "match arms with different types should fail" + ); +} + +#[test] +fn match_simple_enum_air_lowering() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + Color::Red => 1, + Color::Green => 2, + Color::Blue => 3, + } +} +"#; + let air = lower_source(src); + let air_text = print_program(&air); + + // The AIR should contain a switch terminator + assert!( + air_text.contains("switch"), + "match should lower to switch terminator, got:\n{}", + air_text + ); + // The AIR should contain enum_tag extraction + assert!( + air_text.contains("enum_tag"), + "match should extract enum tag, got:\n{}", + air_text + ); +} + +#[test] +fn match_data_variant_air_lowering() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), + Write(string), +} + +fn handle(m: Message) -> i64 { + return match m { + Message::Quit => 0, + Message::Move(x, y) => x, + Message::Write(text) => 1, + } +} +"#; + let air = lower_source(src); + let air_text = print_program(&air); + + // Should contain switch, enum_tag, and enum_payload + assert!( + air_text.contains("switch"), + "match should lower to switch, got:\n{}", + air_text + ); + assert!( + air_text.contains("enum_tag"), + "match should extract tag, got:\n{}", + air_text + ); + assert!( + air_text.contains("enum_payload"), + "data match should extract payload, got:\n{}", + air_text + ); +} + +#[test] +fn match_with_wildcard_air_lowering() { + let src = r#" +enum Color { Red, Green, Blue } + +fn name(c: Color) -> i64 { + return match c { + Color::Red => 1, + _ => 0, + } +} +"#; + let air = lower_source(src); + let air_text = print_program(&air); + + // Should contain switch with default block + assert!( + air_text.contains("switch"), + "match with wildcard should lower to switch, got:\n{}", + air_text + ); + assert!( + air_text.contains("default"), + "match with wildcard should have default block, got:\n{}", + air_text + ); +} + +#[test] +fn match_all_unit_variants_exhaustive_no_default() { + let src = r#" +enum Dir { Up, Down } + +fn check(d: Dir) -> i64 { + return match d { + Dir::Up => 1, + Dir::Down => 2, + } +} +"#; + let air = lower_source(src); + let air_text = print_program(&air); + + // Should contain switch and unreachable (since it is exhaustive without wildcard) + assert!( + air_text.contains("switch"), + "exhaustive match should use switch, got:\n{}", + air_text + ); + assert!( + air_text.contains("unreachable"), + "exhaustive match without wildcard should have unreachable default, got:\n{}", + air_text + ); +} + +#[test] +fn match_binding_types_are_correct() { + let src = r#" +enum Message { + Quit, + Move(i64, i64), +} + +fn get_x(m: Message) -> i64 { + return match m { + Message::Quit => 0, + Message::Move(x, y) => x, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "binding types should propagate correctly: {:?}", + result.err() + ); +} + +#[test] +fn match_nested_in_if() { + let src = r#" +enum Color { Red, Green, Blue } + +fn test(c: Color, flag: bool) -> i64 { + if flag { + return match c { + Color::Red => 1, + Color::Green => 2, + Color::Blue => 3, + } + } + return 0 +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "match nested in if should type-check: {:?}", + result.err() + ); +} + +#[test] +fn match_in_function_return() { + let src = r#" +enum Color { Red, Green, Blue } + +fn to_int(c: Color) -> i64 { + return match c { + Color::Red => 0, + Color::Green => 1, + Color::Blue => 2, + } +} + +let r = to_int(Color::Green) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "match in function return should type-check: {:?}", + result.err() + ); +} + +#[test] +fn match_with_semicolons_between_arms() { + // The parser should accept both commas and semicolons between arms + let src = r#" +enum Color { Red, Green, Blue } + +fn test(c: Color) -> i64 { + return match c { + Color::Red => 1 + Color::Green => 2 + Color::Blue => 3 + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "match with auto-semicolons should parse: {:?}", + result.err() + ); +} + +#[test] +fn match_single_arm_wildcard() { + let src = r#" +enum Color { Red, Green, Blue } + +fn test(c: Color) -> i64 { + return match c { + _ => 42, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "match with only wildcard should type-check: {:?}", + result.err() + ); +} + +#[test] +fn match_data_variant_uses_binding_in_body() { + // Verify that bindings are actually usable in the arm body + let src = r#" +enum Wrapper { + Val(i64), + None, +} + +fn extract(w: Wrapper) -> i64 { + return match w { + Wrapper::Val(x) => x, + Wrapper::None => 0, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "binding used in arm body should type-check: {:?}", + result.err() + ); + // Also verify AIR lowering works + let air = lower_source(src); + let air_text = print_program(&air); + assert!( + air_text.contains("enum_payload"), + "should extract payload for binding, got:\n{}", + air_text + ); +} + +// ============ Generic Enum Tests ============ + +#[test] +fn generic_enum_option_some() { + let src = r#" +enum Option { + Some(T), + None, +} +let x = Option::Some(42) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Option::Some(42) should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_option_none() { + let src = r#" +enum Option { + Some(T), + None, +} +let x: Option = Option::None +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Option::None should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_option_some_string() { + let src = r#" +enum Option { + Some(T), + None, +} +let x = Option::Some("hello") +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Option::Some(\"hello\") should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_option_some_bool() { + let src = r#" +enum Option { + Some(T), + None, +} +let x = Option::Some(true) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Option::Some(true) should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_match() { + let src = r#" +enum Option { + Some(T), + None, +} +fn unwrap_or(opt: Option, default: i64) -> i64 { + match opt { + Option::Some(val) => val, + Option::None => default, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!(result.is_ok(), "match on generic enum: {:?}", result.err()); +} + +#[test] +fn generic_enum_result() { + let src = r#" +enum Result { + Ok(T), + Err(E), +} +fn check(r: Result) -> i64 { + match r { + Result::Ok(val) => val, + Result::Err(msg) => -1, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Result should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_as_return_type() { + let src = r#" +enum Option { + Some(T), + None, +} +fn make_some() -> Option { + return Option::Some(42) +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "generic enum as return type: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_as_param_type() { + let src = r#" +enum Option { + Some(T), + None, +} +fn is_some(opt: Option) -> i64 { + return match opt { + Option::Some(val) => 1, + Option::None => 0, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "generic enum as param type: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_none_without_annotation() { + // Unit variant of a generic enum without type annotation should produce + // a clear "type annotations needed" error, not a confusing AIR-level Opaque error. + let src = r#" +enum Option { + Some(T), + None, +} +let x = Option::None +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "Option::None without annotation should require type annotation" + ); + let errors = result.unwrap_err(); + let msg = format!("{:?}", errors); + assert!( + msg.contains("type annotations needed") || msg.contains("cannot infer"), + "error should mention type annotations: {}", + msg + ); +} + +#[test] +fn generic_enum_multiple_variants_in_function() { + let src = r#" +enum Option { + Some(T), + None, +} +fn test(flag: bool) -> Option { + if flag { + return Option::Some(42) + } + return Option::None +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "returning both Some and None: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_result_ok_construction() { + let src = r#" +enum Result { + Ok(T), + Err(E), +} +let r = Result::Ok(42) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Result::Ok(42) should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_result_err_construction() { + let src = r#" +enum Result { + Ok(T), + Err(E), +} +let r = Result::Err("something failed") +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Result::Err(\"...\") should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_air_lowering() { + let src = r#" +enum Option { + Some(T), + None, +} +let x = Option::Some(42) +"#; + let air = lower_source(src); + // The generic enum def should exist (with type params) + let enum_def = air.enums.iter().find(|e| e.name.contains("Option")); + assert!( + enum_def.is_some(), + "Option enum should exist in AIR: {:?}", + air.enums.iter().map(|e| &e.name).collect::>() + ); +} + +#[test] +fn generic_enum_match_air_lowering() { + let src = r#" +enum Option { + Some(T), + None, +} +fn unwrap_or(opt: Option, default: i64) -> i64 { + return match opt { + Option::Some(val) => val, + Option::None => default, + } +} +"#; + let air = lower_source(src); + let air_text = print_program(&air); + + // Should contain switch and enum_tag + assert!( + air_text.contains("switch"), + "match on generic enum should lower to switch, got:\n{}", + air_text + ); + assert!( + air_text.contains("enum_tag"), + "match should extract tag, got:\n{}", + air_text + ); + assert!( + air_text.contains("enum_payload"), + "data match should extract payload, got:\n{}", + air_text + ); +} + +#[test] +fn generic_enum_wrong_arg_type() { + // Option::Some expects one argument of type T. + // With `Some(42)`, T = i64. But we can't enforce that T must be i64 + // from a separate annotation without one -- this just tests that + // type checking works with the generic args. + let src = r#" +enum Option { + Some(T), + None, +} +let x = Option::Some(42) +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "generic enum construction should type-check: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_exhaustive_match() { + let src = r#" +enum Option { + Some(T), + None, +} +fn test(opt: Option) -> i64 { + return match opt { + Option::Some(v) => v, + Option::None => 0, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "exhaustive match on generic enum: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_non_exhaustive_match_error() { + let src = r#" +enum Option { + Some(T), + None, +} +fn test(opt: Option) -> i64 { + return match opt { + Option::Some(v) => v, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_err(), + "non-exhaustive match on generic enum should fail" + ); +} + +#[test] +fn generic_enum_match_with_wildcard() { + let src = r#" +enum Option { + Some(T), + None, +} +fn test(opt: Option) -> i64 { + return match opt { + Option::Some(v) => v, + _ => -1, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "generic enum match with wildcard: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_result_match_both_variants() { + let src = r#" +enum Result { + Ok(T), + Err(E), +} +fn handle(r: Result) -> i64 { + return match r { + Result::Ok(val) => val, + Result::Err(msg) => -1, + } +} +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "Result match both variants: {:?}", + result.err() + ); +} + +#[test] +fn nested_generic_enum_unit_variant_monomorphizes_nested_enum_def() { + let src = r#" +enum Pair { + Both(A, B), + Neither, +} + +enum Boxed { + Value(T), + Empty, +} + +fn get_empty() -> Boxed> { + return Boxed::Empty +} + +fn main() { + let e = get_empty() +} +"#; + let air = lower_and_monomorphize(src); + let pair = air + .enums + .iter() + .find(|e| e.name == "__mono_Pair_i64$str") + .expect("nested Pair mono enum should exist"); + assert_eq!(pair.variants[0].payload.len(), 2); + + let boxed = air + .enums + .iter() + .find(|e| e.name == "__mono_Boxed_enum___mono_Pair_i64$str") + .expect("Boxed> mono enum should exist"); + let value_variant = boxed + .variants + .iter() + .find(|v| v.name == "Value") + .expect("Value variant should exist"); + assert_eq!( + value_variant.payload, + vec![AirType::Enum("__mono_Pair_i64$str".to_string())] + ); +} + +#[test] +fn generic_enum_unit_variant_with_fnptr_type_arg_monomorphizes() { + let src = r#" +enum Holder { + Value(T), + Empty, +} + +fn apply_default() -> Holder i64> { + return Holder::Empty +} +"#; + let air = lower_and_monomorphize(src); + let air_text = print_program(&air); + + let holder = air + .enums + .iter() + .find(|e| e.name == "__mono_Holder_fnptr$i64$Ri64") + .expect("fnptr-instantiated Holder enum should exist"); + assert_eq!(holder.variants.len(), 2); + assert!( + !air_text.contains("enum_init Holder::"), + "fnptr generic unit variant should be rewritten to mono enum:\n{air_text}" + ); +} + +#[test] +fn generic_enum_named_fn_payload_uses_fnptr_monomorphization() { + let src = r#" +enum Holder { + Value(T), + Empty, +} + +fn inc(x: i64) -> i64 { + return x + 1 +} + +fn call_holder(h: Holder i64>) -> i64 { + return match h { + Holder::Value(f) => f(41) + Holder::Empty => 0 + } +} + +fn main() { + let h: Holder i64> = Holder::Value(inc) + call_holder(h) +} +"#; + let air = lower_and_monomorphize(src); + let air_text = print_program(&air); + + assert!( + air_text.contains("enum_init __mono_Holder_fnptr$i64$Ri64::Value"), + "named function payload should monomorphize to fnptr enum, got:\n{air_text}" + ); + assert!( + !air_text.contains("__mono_Holder_ptr_void"), + "named function payload must not degrade to ptr_void mono, got:\n{air_text}" + ); +} + +#[test] +fn generic_enum_result_air_lowering() { + let src = r#" +enum Result { + Ok(T), + Err(E), +} +fn handle(r: Result) -> i64 { + return match r { + Result::Ok(val) => val, + Result::Err(msg) => -1, + } +} +"#; + let air = lower_source(src); + let air_text = print_program(&air); + + assert!( + air_text.contains("switch"), + "Result match should lower to switch, got:\n{}", + air_text + ); + assert!( + air_text.contains("enum_payload"), + "Result match should extract payload, got:\n{}", + air_text + ); +} + +// ============ Multiple Instantiation Tests ============ + +#[test] +fn generic_enum_none_with_multiple_monos() { + let src = r#" +enum Option { + Some(T), + None, +} +let a = Option::Some(42) +let b = Option::Some("hello") +let c: Option = Option::None +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "None with multiple monos should work: {:?}", + result.err() + ); +} + +#[test] +fn generic_enum_none_with_multiple_monos_air() { + // Uses functions instead of top-level lets because top-level lets become + // globals (which don't emit EnumInit to AIR). + let src = r#" +enum Option { + Some(T), + None, +} +fn make_int() -> Option { + return Option::Some(42) +} +fn make_str() -> Option { + return Option::Some("hello") +} +fn make_none_int() -> Option { + return Option::None +} +"#; + let air = lower_source(src); + let air = aelys_air::mono::monomorphize(air).unwrap(); + let air_text = print_program(&air); + + // After monomorphization, no generic enum definitions should remain + let remaining_generic = air.enums.iter().any(|e| !e.type_params.is_empty()); + assert!( + !remaining_generic, + "no generic enum defs should remain after mono, got:\n{}", + air_text + ); + + // The enum_init for None variant should reference a monomorphized name + // (not the raw "Option") + assert!( + !air_text.contains("enum_init Option::"), + "unit variant Option::None should be monomorphized, got:\n{}", + air_text + ); +} + +#[test] +fn generic_enum_unit_variant_multiple_monos_in_function() { + let src = r#" +enum Option { + Some(T), + None, +} +fn test() -> Option { + return Option::None +} +fn test2() -> Option { + return Option::None +} +let a = test() +let b = test2() +"#; + let result = compile_to_typed_ast(src); + assert!( + result.is_ok(), + "None in different typed functions should work: {:?}", + result.err() + ); +} diff --git a/aelys/tests/facade_api_smoke.rs b/aelys/tests/facade_api_smoke.rs deleted file mode 100644 index 50ba5c3..0000000 --- a/aelys/tests/facade_api_smoke.rs +++ /dev/null @@ -1,12 +0,0 @@ -use aelys_runtime::Value; - -#[test] -fn facade_smoke_compiles_and_runs() { - let result = aelys::api::run("1 + 2", "").expect("run should succeed"); - assert_eq!(result.to_string(), "3"); - - let mut vm = aelys::api::new_vm().expect("vm"); - aelys::api::run_with_vm(&mut vm, "fn f(x: int) -> int { x + 1 }", "").unwrap(); - let out = aelys::api::call_function(&mut vm, "f", &[Value::int(41)]).unwrap(); - assert_eq!(out.to_string(), "42"); -} diff --git a/aelys/tests/fatal_error_filter_tests.rs b/aelys/tests/fatal_error_filter_tests.rs new file mode 100644 index 0000000..3144915 --- /dev/null +++ b/aelys/tests/fatal_error_filter_tests.rs @@ -0,0 +1,355 @@ +/// Tests that programs which SHOULD compile aren't falsely rejected +/// by the all-fatal error filter in sema/entry.rs. +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; +use std::collections::HashSet; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn sema_ok_with_builtins(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let builtins: HashSet = ["print", "println"].iter().map(|s| s.to_string()).collect(); + TypeInference::infer_program_with_imports(stmts, src, Default::default(), builtins).is_ok() +} + +#[test] +fn generic_identity_function() { + assert!( + sema_ok("fn id(x: T) -> T { return x }"), + "generic identity function should compile" + ); +} + +#[test] +fn generic_with_concrete_call() { + assert!( + sema_ok( + r#" +fn id(x: T) -> T { return x } +fn use_it() -> i64 { return id(42) } +"# + ), + "calling generic fn with concrete arg should compile" + ); +} + +#[test] +fn generic_with_multiple_instantiations() { + assert!( + sema_ok( + r#" +fn first(a: T, b: T) -> T { return a } +fn test() -> i64 { + let a = first(1, 2) + return a +} +"# + ), + "multiple generic instantiations should compile" + ); +} + +#[test] +fn simple_closure() { + assert!( + sema_ok( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { return f(x) } +fn test() -> i64 { + let double = fn(x: i64) -> i64 { return x * 2 } + return apply(double, 5) +} +"# + ), + "simple closure should compile" + ); +} + +#[test] +fn closure_capturing_local() { + assert!( + sema_ok( + r#" +fn test() -> i64 { + let offset: i64 = 10 + let add_offset = fn(x: i64) -> i64 { return x + offset } + return add_offset(5) +} +"# + ), + "closure capturing local should compile" + ); +} + +#[test] +fn recursive_factorial() { + assert!( + sema_ok( + r#" +fn factorial(n: i64) -> i64 { + if n < 2 { + return 1 + } + return n * factorial(n - 1) +} +"# + ), + "recursive factorial should compile" + ); +} + +#[test] +fn mutual_recursion_style() { + // not truly mutual (would need forward decl) but tests that calling another function from within a function works + assert!( + sema_ok( + r#" +fn is_even(n: i64) -> bool { return n == 0 } +fn is_odd(n: i64) -> bool { + if n == 0 { return false } + return is_even(n - 1) +} +"# + ), + "pseudo-mutual recursion should compile" + ); +} + +#[test] +fn struct_with_methods_style() { + assert!( + sema_ok( + r#" +struct Point { x: i64, y: i64 } +fn point_sum(p: Point) -> i64 { + return p.x + p.y +} +fn test() -> i64 { + let p = Point { x: 1, y: 2 } + return point_sum(p) +} +"# + ), + "struct creation and field access should compile" + ); +} + +#[test] +fn nested_struct() { + assert!( + sema_ok( + r#" +struct Inner { val: i64 } +struct Outer { inner: Inner } +fn test() -> i64 { + let inner = Inner { val: 42 } + let outer = Outer { inner: inner } + return outer.inner.val +} +"# + ), + "nested struct should compile" + ); +} + +#[test] +fn string_len() { + assert!( + sema_ok( + r#" +fn str_len(s: string) -> i64 { return s.len } +"# + ), + "string .len should compile" + ); +} + +#[test] +fn string_concatenation() { + assert!( + sema_ok( + r#" +fn greet(name: string) -> string { return "Hello, " + name } +"# + ), + "string concatenation should compile" + ); +} + +#[test] +fn function_as_parameter() { + assert!( + sema_ok( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { return f(x) } +fn double(x: i64) -> i64 { return x * 2 } +fn test() -> i64 { return apply(double, 5) } +"# + ), + "passing function as parameter should compile" + ); +} + +#[test] +fn nested_if_with_returns() { + assert!( + sema_ok( + r#" +fn classify(x: i64) -> i64 { + if x > 0 { + if x > 100 { + return 3 + } + return 2 + } + if x == 0 { + return 1 + } + return 0 +} +"# + ), + "nested if with multiple returns should compile" + ); +} + +#[test] +fn while_with_early_return() { + assert!( + sema_ok( + r#" +fn find(arr: [i64; 10], n: i64, target: i64) -> i64 { + let mut i: i64 = 0 + while i < n { + if arr[i] == target { + return i + } + i = i + 1 + } + return -1 +} +"# + ), + "while with early return should compile" + ); +} + +#[test] +fn rejects_string_minus_string() { + assert!( + !sema_ok(r#"fn f() -> string { return "a" - "b" }"#), + "string subtraction should be rejected" + ); +} + +#[test] +fn rejects_bool_arithmetic() { + assert!( + !sema_ok("fn f() -> bool { return true + false }"), + "bool addition should be rejected" + ); +} + +#[test] +fn void_function_no_return() { + assert!( + sema_ok( + r#" +fn noop() -> void { + let x: i64 = 1 +} +"# + ), + "void function without return should compile" + ); +} + +#[test] +fn dynamic_type_unifies_with_anything() { + // println is registered as Dynamic via bootstrap builtins + // TODO: REMOVE WHEN BOOTSTRAPPING IS DONE. + assert!( + sema_ok_with_builtins( + r#" +fn test() { println("hello") } +"# + ), + "Dynamic-typed println should accept string" + ); +} + +#[test] +fn println_does_not_satisfy_non_void_return() { + assert!( + !sema_ok_with_builtins( + r#" +fn main() -> i64 { + println("hello") +} +"# + ), + "println should not be accepted as implicit i64 return value" + ); +} + +#[test] +fn explicit_cast_i32_to_f64() { + assert!( + sema_ok("fn f(x: i32) -> f64 { return x as f64 }"), + "explicit cast should compile" + ); +} + +#[test] +fn explicit_cast_chain() { + assert!( + sema_ok("fn f(x: i64) -> i8 { return (x as i32) as i8 }"), + "chained casts should compile" + ); +} + +#[test] +fn generic_with_struct_name_collision_rejected() { + // struct T shadows type param T in instantiate_type_params, so identity(42) is a type error + assert!( + !sema_ok( + r#" +struct T { value: i64 } +fn identity(x: T) -> T { return x } +fn test() -> i64 { return identity(42) } +"# + ), + "struct T shadows type param T, so identity(42) should be rejected" + ); +} + +#[test] +fn generic_struct_with_type_param_name_collision_rejected() { + // struct T shadows type param T in Box, so Box { inner: 42 } is a type error + assert!( + !sema_ok( + r#" +struct T { value: i64 } +struct Box { inner: T } +fn test() -> i64 { + let b = Box { inner: 42 } + return b.inner +} +"# + ), + "struct T shadows type param T in generic struct, should be rejected" + ); +} diff --git a/aelys/tests/fixtures/native_cycle_a/Cargo.toml b/aelys/tests/fixtures/native_cycle_a/Cargo.toml deleted file mode 100644 index a6c8cd2..0000000 --- a/aelys/tests/fixtures/native_cycle_a/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-cycle-a" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_cycle_a/src/lib.rs b/aelys/tests/fixtures/native_cycle_a/src/lib.rs deleted file mode 100644 index f28a5b6..0000000 --- a/aelys/tests/fixtures/native_cycle_a/src/lib.rs +++ /dev/null @@ -1,55 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysRequiredModule, - AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"cycle_a\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"a\0"; - -static DEP_NAME: &[u8] = b"cycle_b\0"; - -extern "C" fn cycle_a_fn( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(1); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: cycle_a_fn as *const c_void, -}]; - -static REQUIRED: [AelysRequiredModule; 1] = [AelysRequiredModule { - name: DEP_NAME.as_ptr() as *const i8, - version_req: core::ptr::null(), -}]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: REQUIRED.len() as u32, - required_modules: REQUIRED.as_ptr(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_cycle_b/Cargo.toml b/aelys/tests/fixtures/native_cycle_b/Cargo.toml deleted file mode 100644 index cd413d4..0000000 --- a/aelys/tests/fixtures/native_cycle_b/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-cycle-b" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_cycle_b/src/lib.rs b/aelys/tests/fixtures/native_cycle_b/src/lib.rs deleted file mode 100644 index cd2dd8b..0000000 --- a/aelys/tests/fixtures/native_cycle_b/src/lib.rs +++ /dev/null @@ -1,55 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysRequiredModule, - AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"cycle_b\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"b\0"; - -static DEP_NAME: &[u8] = b"cycle_a\0"; - -extern "C" fn cycle_b_fn( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(2); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: cycle_b_fn as *const c_void, -}]; - -static REQUIRED: [AelysRequiredModule; 1] = [AelysRequiredModule { - name: DEP_NAME.as_ptr() as *const i8, - version_req: core::ptr::null(), -}]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: REQUIRED.len() as u32, - required_modules: REQUIRED.as_ptr(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_dep_a/Cargo.toml b/aelys/tests/fixtures/native_dep_a/Cargo.toml deleted file mode 100644 index 2f4facd..0000000 --- a/aelys/tests/fixtures/native_dep_a/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-dep-a" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_dep_a/src/lib.rs b/aelys/tests/fixtures/native_dep_a/src/lib.rs deleted file mode 100644 index 7e6865d..0000000 --- a/aelys/tests/fixtures/native_dep_a/src/lib.rs +++ /dev/null @@ -1,56 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysRequiredModule, - AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"dep_a\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"a\0"; - -static DEP_NAME: &[u8] = b"dep_b\0"; -static DEP_VERSION: &[u8] = b">=1.0.0\0"; - -extern "C" fn dep_a_fn( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(10); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: dep_a_fn as *const c_void, -}]; - -static REQUIRED: [AelysRequiredModule; 1] = [AelysRequiredModule { - name: DEP_NAME.as_ptr() as *const i8, - version_req: DEP_VERSION.as_ptr() as *const i8, -}]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: REQUIRED.len() as u32, - required_modules: REQUIRED.as_ptr(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_dep_b/Cargo.toml b/aelys/tests/fixtures/native_dep_b/Cargo.toml deleted file mode 100644 index f419b3b..0000000 --- a/aelys/tests/fixtures/native_dep_b/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-dep-b" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_dep_b/src/lib.rs b/aelys/tests/fixtures/native_dep_b/src/lib.rs deleted file mode 100644 index 35a4a08..0000000 --- a/aelys/tests/fixtures/native_dep_b/src/lib.rs +++ /dev/null @@ -1,47 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"dep_b\0"; -static MODULE_VERSION: &[u8] = b"1.0.0\0"; -static EXPORT_NAME: &[u8] = b"id\0"; - -extern "C" fn dep_b_id( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(1); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: dep_b_id as *const c_void, -}]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: 0, - required_modules: core::ptr::null(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_dep_c/Cargo.toml b/aelys/tests/fixtures/native_dep_c/Cargo.toml deleted file mode 100644 index b110a97..0000000 --- a/aelys/tests/fixtures/native_dep_c/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-dep-c" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_dep_c/src/lib.rs b/aelys/tests/fixtures/native_dep_c/src/lib.rs deleted file mode 100644 index 4e58924..0000000 --- a/aelys/tests/fixtures/native_dep_c/src/lib.rs +++ /dev/null @@ -1,56 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysRequiredModule, - AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"dep_c\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"c\0"; - -static DEP_NAME: &[u8] = b"dep_b\0"; -static DEP_VERSION: &[u8] = b">=2.0.0\0"; - -extern "C" fn dep_c_fn( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(20); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: dep_c_fn as *const c_void, -}]; - -static REQUIRED: [AelysRequiredModule; 1] = [AelysRequiredModule { - name: DEP_NAME.as_ptr() as *const i8, - version_req: DEP_VERSION.as_ptr() as *const i8, -}]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: REQUIRED.len() as u32, - required_modules: REQUIRED.as_ptr(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_hot_a/Cargo.toml b/aelys/tests/fixtures/native_hot_a/Cargo.toml deleted file mode 100644 index 0d47de9..0000000 --- a/aelys/tests/fixtures/native_hot_a/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-hot-a" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_hot_a/src/lib.rs b/aelys/tests/fixtures/native_hot_a/src/lib.rs deleted file mode 100644 index d19a4bf..0000000 --- a/aelys/tests/fixtures/native_hot_a/src/lib.rs +++ /dev/null @@ -1,47 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"hot_mod\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"value\0"; - -extern "C" fn hot_value( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(1); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: hot_value as *const c_void, -}]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: 0, - required_modules: core::ptr::null(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_hot_b/Cargo.toml b/aelys/tests/fixtures/native_hot_b/Cargo.toml deleted file mode 100644 index 14a8834..0000000 --- a/aelys/tests/fixtures/native_hot_b/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-hot-b" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_hot_b/src/lib.rs b/aelys/tests/fixtures/native_hot_b/src/lib.rs deleted file mode 100644 index d955b59..0000000 --- a/aelys/tests/fixtures/native_hot_b/src/lib.rs +++ /dev/null @@ -1,69 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"hot_mod\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"value\0"; -static EXTRA_NAME: &[u8] = b"extra\0"; - -extern "C" fn hot_value( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(2); - } - 0 -} - -extern "C" fn hot_extra( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(3); - } - 0 -} - -static EXPORTS: [AelysExport; 2] = [ - AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: hot_value as *const c_void, - }, - AelysExport { - name: EXTRA_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: hot_extra as *const c_void, - }, -]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: 0, - required_modules: core::ptr::null(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_tamper/Cargo.toml b/aelys/tests/fixtures/native_tamper/Cargo.toml deleted file mode 100644 index 1ef49b5..0000000 --- a/aelys/tests/fixtures/native_tamper/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-tamper" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_tamper/src/lib.rs b/aelys/tests/fixtures/native_tamper/src/lib.rs deleted file mode 100644 index e2b633e..0000000 --- a/aelys/tests/fixtures/native_tamper/src/lib.rs +++ /dev/null @@ -1,45 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"tamper\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"ok\0"; - -extern "C" fn tamper_ok( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(7); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: tamper_ok as *const c_void, -}]; - -#[unsafe(no_mangle)] -pub static aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 1, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: 0, - required_modules: core::ptr::null(), - init: None, -}; diff --git a/aelys/tests/fixtures/native_test/Cargo.toml b/aelys/tests/fixtures/native_test/Cargo.toml deleted file mode 100644 index cd7e888..0000000 --- a/aelys/tests/fixtures/native_test/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-test" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_test/src/lib.rs b/aelys/tests/fixtures/native_test/src/lib.rs deleted file mode 100644 index effc020..0000000 --- a/aelys/tests/fixtures/native_test/src/lib.rs +++ /dev/null @@ -1,45 +0,0 @@ -use aelys_native::{AelysExport, AelysExportKind, AelysModuleDescriptor, AelysValue, AELYS_ABI_VERSION, value_int}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"native_test\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"add\0"; - -extern "C" fn test_add( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - _out: *mut AelysValue, -) -> i32 { - unsafe { - *_out = value_int(10); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 2, - _padding: [0; 3], - value: test_add as *const c_void, -}]; - -#[unsafe(no_mangle)] -pub static mut aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: 0, - required_modules: core::ptr::null(), - init: None, -}; - -aelys_native::aelys_init_exports_hash!(aelys_module_descriptor); diff --git a/aelys/tests/fixtures/native_zero_hash/Cargo.toml b/aelys/tests/fixtures/native_zero_hash/Cargo.toml deleted file mode 100644 index 54d4391..0000000 --- a/aelys/tests/fixtures/native_zero_hash/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "aelys-native-zero-hash" -version = "0.1.0" -edition = "2024" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -aelys-native = { path = "../../../../native" } - -[workspace] diff --git a/aelys/tests/fixtures/native_zero_hash/src/lib.rs b/aelys/tests/fixtures/native_zero_hash/src/lib.rs deleted file mode 100644 index 83db06d..0000000 --- a/aelys/tests/fixtures/native_zero_hash/src/lib.rs +++ /dev/null @@ -1,45 +0,0 @@ -use aelys_native::{ - value_int, AelysExport, AelysExportKind, AelysModuleDescriptor, AelysValue, AELYS_ABI_VERSION, -}; -use core::ffi::c_void; - -static MODULE_NAME: &[u8] = b"zero_hash\0"; -static MODULE_VERSION: &[u8] = b"0.1.0\0"; -static EXPORT_NAME: &[u8] = b"zero\0"; - -extern "C" fn zero_hash_fn( - _vm: *mut c_void, - _args: *const AelysValue, - _arg_count: usize, - out: *mut AelysValue, -) -> i32 { - unsafe { - *out = value_int(0); - } - 0 -} - -static EXPORTS: [AelysExport; 1] = [AelysExport { - name: EXPORT_NAME.as_ptr() as *const i8, - kind: AelysExportKind::Function, - arity: 0, - _padding: [0; 3], - value: zero_hash_fn as *const c_void, -}]; - -#[unsafe(no_mangle)] -pub static aelys_module_descriptor: AelysModuleDescriptor = AelysModuleDescriptor { - abi_version: AELYS_ABI_VERSION, - descriptor_size: core::mem::size_of::() as u32, - module_name: MODULE_NAME.as_ptr() as *const i8, - module_version: MODULE_VERSION.as_ptr() as *const i8, - vm_version_min: core::ptr::null(), - vm_version_max: core::ptr::null(), - descriptor_hash: 0, - exports_hash: 0, - export_count: EXPORTS.len() as u32, - exports: EXPORTS.as_ptr(), - required_module_count: 0, - required_modules: core::ptr::null(), - init: None, -}; diff --git a/aelys/tests/fmt_string_tests.rs b/aelys/tests/fmt_string_tests.rs deleted file mode 100644 index 8962175..0000000 --- a/aelys/tests/fmt_string_tests.rs +++ /dev/null @@ -1,237 +0,0 @@ -use aelys::run; - -fn run_ok(source: &str) -> aelys_runtime::Value { - run(source, "test.aelys").expect("Expected program to run successfully") -} - -fn run_err(source: &str) -> String { - match run(source, "test.aelys") { - Ok(_) => panic!("Expected program to fail, but it succeeded"), - Err(e) => format!("{}", e), - } -} - -#[test] -fn inline_interpolation_simple() { - let result = run_ok( - r#" - let name = "world" - "hello {name}" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn inline_interpolation_expression() { - let result = run_ok( - r#" - let x = 5 - "x + 1 = {x + 1}" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn placeholder_needs_args() { - // placeholder {} without arguments should error at compile time - let err = run_err( - r#" - let s = "value: {}" - s - "#, - ); - assert!(err.contains("placeholder") || err.contains("argument")); -} - -#[test] -fn escape_double_braces() { - let result = run_ok( - r#" - "JSON: {{key}}" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn mixed_literals_and_expressions() { - let result = run_ok( - r#" - let a = 1 - let b = 2 - "a={a}, b={b}, sum={a + b}" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn empty_format_string() { - let result = run_ok( - r#" - "" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn no_interpolation_fallback_to_string() { - let result = run_ok( - r#" - "hello world" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn tostring_builtin_exists() { - // __tostring should be available as a builtin - let result = run_ok( - r#" - __tostring(42) - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn tostring_converts_int() { - let result = run_ok( - r#" - let s = __tostring(123) - s - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn tostring_converts_float() { - let result = run_ok( - r#" - let s = __tostring(3.14) - s - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn tostring_converts_bool() { - let result = run_ok( - r#" - let s = __tostring(true) - s - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn nested_braces_in_expr() { - // expression containing braces (like a dict literal in future) - // for now just test that balanced braces work - let result = run_ok( - r#" - let arr = [1, 2, 3] - "arr = {arr}" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn fmt_string_with_function_call() { - let result = run_ok( - r#" - fn double(x) { x * 2 } - "doubled: {double(5)}" - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn error_unterminated_expr() { - let err = run_err( - r#" - "test {x" - "#, - ); - assert!(err.contains("unterminated") || err.contains("}")); -} - -#[test] -fn error_unmatched_close_brace() { - let err = run_err( - r#" - "test }" - "#, - ); - assert!(err.contains("unmatched") || err.contains("}")); -} - -// Placeholder syntax at call site: func("fmt {}", arg) - -#[test] -fn placeholder_in_call_single() { - let result = run_ok( - r#" - fn identity(s) { s } - identity("value: {}", 42) - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn placeholder_in_call_multiple() { - let result = run_ok( - r#" - fn identity(s) { s } - let x = 10 - let y = 20 - identity("x={}, y={}", x, y) - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn placeholder_in_call_with_extra_args() { - // func("fmt {}", val, extra_arg) - extra_arg goes to func - let result = run_ok( - r#" - fn take_two(s, n) { s } - take_two("num: {}", 42, 99) - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn placeholder_mixed_with_inline() { - let result = run_ok( - r#" - fn identity(s) { s } - let name = "Reimu" - identity("hello {name}, your number is {}", 7) - "#, - ); - assert!(result.as_ptr().is_some()); -} - -#[test] -fn placeholder_not_enough_args() { - let err = run_err( - r#" - fn identity(s) { s } - identity("a={}, b={}", 1) - "#, - ); - assert!(err.contains("placeholder") || err.contains("argument")); -} diff --git a/aelys/tests/frame_tests.rs b/aelys/tests/frame_tests.rs deleted file mode 100644 index 34990dd..0000000 --- a/aelys/tests/frame_tests.rs +++ /dev/null @@ -1,107 +0,0 @@ -//! Tests for Aelys VM call frames - -use aelys_runtime::{CallFrame, GcRef}; -use std::ptr; - -#[test] -fn test_call_frame_creation() { - let gc_ref = GcRef::new(5); - let frame = CallFrame::new(gc_ref, 10, ptr::null(), 0, ptr::null(), 0, 0); - - assert_eq!(frame.function(), gc_ref); - assert_eq!(frame.ip(), 0); - assert_eq!(frame.base(), 10); -} - -#[test] -fn test_advance_ip() { - let gc_ref = GcRef::new(0); - let mut frame = CallFrame::new(gc_ref, 0, ptr::null(), 0, ptr::null(), 0, 0); - - assert_eq!(frame.ip(), 0); - - frame.advance_ip(); - assert_eq!(frame.ip(), 1); - - frame.advance_ip(); - assert_eq!(frame.ip(), 2); -} - -#[test] -fn test_set_ip() { - let gc_ref = GcRef::new(0); - let mut frame = CallFrame::new(gc_ref, 0, ptr::null(), 0, ptr::null(), 0, 0); - - frame.set_ip(42); - assert_eq!(frame.ip(), 42); - - frame.set_ip(0); - assert_eq!(frame.ip(), 0); -} - -#[test] -fn test_jump_forward() { - let gc_ref = GcRef::new(0); - let mut frame = CallFrame::new(gc_ref, 0, ptr::null(), 0, ptr::null(), 0, 0); - - frame.set_ip(10); - frame.jump(5); - assert_eq!(frame.ip(), 15); -} - -#[test] -fn test_jump_backward() { - let gc_ref = GcRef::new(0); - let mut frame = CallFrame::new(gc_ref, 0, ptr::null(), 0, ptr::null(), 0, 0); - - frame.set_ip(10); - frame.jump(-3); - assert_eq!(frame.ip(), 7); -} - -#[test] -fn test_jump_backward_saturating() { - let gc_ref = GcRef::new(0); - let mut frame = CallFrame::new(gc_ref, 0, ptr::null(), 0, ptr::null(), 0, 0); - - frame.set_ip(2); - frame.jump(-10); - assert_eq!(frame.ip(), 0); // Saturates at 0 -} - -#[test] -fn test_register_index() { - let gc_ref = GcRef::new(0); - let frame = CallFrame::new(gc_ref, 100, ptr::null(), 0, ptr::null(), 0, 0); - - assert_eq!(frame.register_index(0), Some(100)); - assert_eq!(frame.register_index(1), Some(101)); - assert_eq!(frame.register_index(5), Some(105)); - assert_eq!(frame.register_index(255), Some(355)); -} - -#[test] -fn test_register_index_overflow() { - let gc_ref = GcRef::new(0); - let frame = CallFrame::new(gc_ref, usize::MAX - 10, ptr::null(), 0, ptr::null(), 0, 0); - - assert_eq!(frame.register_index(10), Some(usize::MAX)); - assert_eq!(frame.register_index(11), None); - assert_eq!(frame.register_index(255), None); -} - -#[test] -fn test_clone() { - let gc_ref = GcRef::new(42); - let frame1 = CallFrame::new(gc_ref, 10, ptr::null(), 0, ptr::null(), 0, 0); - let mut frame2 = frame1.clone(); - - assert_eq!(frame1.function(), frame2.function()); - assert_eq!(frame1.ip(), frame2.ip()); - assert_eq!(frame1.base(), frame2.base()); - - // Modifying clone doesn't affect original - frame2.advance_ip(); - assert_eq!(frame1.ip(), 0); - assert_eq!(frame2.ip(), 1); -} diff --git a/aelys/tests/function_call_api_tests.rs b/aelys/tests/function_call_api_tests.rs deleted file mode 100644 index ad9d498..0000000 --- a/aelys/tests/function_call_api_tests.rs +++ /dev/null @@ -1,260 +0,0 @@ -use aelys::{call_function, get_function, new_vm, run_with_vm}; -use aelys_runtime::Value; - -#[test] -fn test_call_function_simple() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn add(a, b) { a + b }", "def").unwrap(); - - let result = call_function(&mut vm, "add", &[Value::int(10), Value::int(32)]).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_call_function_no_args() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn get_answer() { 42 }", "def").unwrap(); - - let result = call_function(&mut vm, "get_answer", &[]).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_call_function_float() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn mul(a, b) { a * b }", "def").unwrap(); - - let result = call_function(&mut vm, "mul", &[Value::float(2.5), Value::float(4.0)]).unwrap(); - assert_eq!(result.as_float(), Some(10.0)); -} - -#[test] -fn test_call_function_not_found() { - let mut vm = new_vm().unwrap(); - - let err = call_function(&mut vm, "nonexistent", &[]); - assert!(err.is_err()); - let msg = err.unwrap_err().to_string(); - assert!(msg.contains("nonexistent") || msg.contains("not found")); -} - -#[test] -fn test_call_function_arity_mismatch() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn need_two(a, b) { a + b }", "def").unwrap(); - - let err = call_function(&mut vm, "need_two", &[Value::int(1)]); - assert!(err.is_err()); - let msg = err.unwrap_err().to_string(); - assert!(msg.contains("expected 2") || msg.contains("arity")); -} - -#[test] -fn test_get_function_basic() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn double(x) { x * 2 }", "def").unwrap(); - - let double = get_function(&vm, "double").unwrap(); - let result = double.call(&mut vm, &[Value::int(21)]).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_get_function_repeated_calls() { - let mut vm = new_vm().unwrap(); - run_with_vm( - &mut vm, - "fn lcg(state) { (state * 1103515245 + 12345) & 0x7FFFFFFF }", - "def", - ) - .unwrap(); - - let rng = get_function(&vm, "lcg").unwrap(); - - let mut state = Value::int(42); - for _ in 0..1000 { - state = rng.call(&mut vm, &[state]).unwrap(); - } - - assert!(state.as_int().is_some()); - assert!(state.as_int().unwrap() > 0); -} - -#[test] -fn test_get_function_not_found() { - let vm = new_vm().unwrap(); - - let err = get_function(&vm, "nonexistent"); - assert!(err.is_err()); -} - -#[test] -fn test_call_function_with_globals() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "let mut counter = 0", "init").unwrap(); - run_with_vm(&mut vm, "fn increment(n) { counter += n; counter }", "def").unwrap(); - - let result1 = call_function(&mut vm, "increment", &[Value::int(5)]).unwrap(); - assert_eq!(result1.as_int(), Some(5)); - - let result2 = call_function(&mut vm, "increment", &[Value::int(3)]).unwrap(); - assert_eq!(result2.as_int(), Some(8)); -} - -#[test] -fn test_call_function_recursive() { - let mut vm = new_vm().unwrap(); - run_with_vm( - &mut vm, - r#" -fn factorial(n) { - if n <= 1 { - return 1 - } - return n * factorial(n - 1) -} -"#, - "def", - ) - .unwrap(); - - let result = call_function(&mut vm, "factorial", &[Value::int(10)]).unwrap(); - assert_eq!(result.as_int(), Some(3628800)); -} - -#[test] -fn test_call_closure() { - let mut vm = new_vm().unwrap(); - run_with_vm( - &mut vm, - r#" -fn make_adder(n) { - fn adder(x) { x + n } - adder -} -let add_10 = make_adder(10) -"#, - "def", - ) - .unwrap(); - - let result = call_function(&mut vm, "add_10", &[Value::int(32)]).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_callable_function_copy() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn id(x) { x }", "def").unwrap(); - - let f1 = get_function(&vm, "id").unwrap(); - let f2 = f1.clone(); - - let r1 = f1.call(&mut vm, &[Value::int(1)]).unwrap(); - let r2 = f2.call(&mut vm, &[Value::int(2)]).unwrap(); - - assert_eq!(r1.as_int(), Some(1)); - assert_eq!(r2.as_int(), Some(2)); -} - -#[test] -fn test_call_function_returns_null() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn nothing() { }", "def").unwrap(); - - let result = call_function(&mut vm, "nothing", &[]).unwrap(); - assert!(result.is_null()); -} - -#[test] -fn test_call_function_bool_return() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn is_positive(x) { x > 0 }", "def").unwrap(); - - let pos = call_function(&mut vm, "is_positive", &[Value::int(5)]).unwrap(); - let neg = call_function(&mut vm, "is_positive", &[Value::int(-5)]).unwrap(); - - assert_eq!(pos.as_bool(), Some(true)); - assert_eq!(neg.as_bool(), Some(false)); -} - -#[test] -fn test_call_function_mixed_types() { - let mut vm = new_vm().unwrap(); - run_with_vm( - &mut vm, - r#" -fn check(flag, x) { - if flag { x * 2 } else { x + 1 } -} -"#, - "def", - ) - .unwrap(); - - let r1 = call_function(&mut vm, "check", &[Value::bool(true), Value::int(10)]).unwrap(); - let r2 = call_function(&mut vm, "check", &[Value::bool(false), Value::int(10)]).unwrap(); - - assert_eq!(r1.as_int(), Some(20)); - assert_eq!(r2.as_int(), Some(11)); -} - -#[test] -fn test_callable_function_introspection() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn id(x) { x }", "def").unwrap(); - - let f = get_function(&vm, "id").unwrap(); - - assert_eq!(f.arity(), 1); - assert!(!f.is_native()); - assert!(!f.is_closure()); -} - -#[test] -fn test_performance_call_function_vs_run() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn inc(x) { x + 1 }", "def").unwrap(); - - let inc = get_function(&vm, "inc").unwrap(); - - let mut sum = 0i64; - for i in 0..10000 { - let result = inc.call(&mut vm, &[Value::int(i)]).unwrap(); - sum += result.as_int().unwrap(); - } - - // sum of (i+1) for i in 0..10000 = sum of 1..10001 = 10000*10001/2 = 50005000 - assert_eq!(sum, 50005000); -} - -#[test] -fn test_cached_vs_uncached_performance() { - let mut vm = new_vm().unwrap(); - run_with_vm(&mut vm, "fn double(x) { x * 2 }", "def").unwrap(); - - // Uncached: HashMap lookup each time - let start = std::time::Instant::now(); - for i in 0..100_000 { - let _ = call_function(&mut vm, "double", &[Value::int(i)]).unwrap(); - } - let uncached_time = start.elapsed(); - - // Cached: No lookups after get_function - let double = get_function(&vm, "double").unwrap(); - let start = std::time::Instant::now(); - for i in 0..100_000 { - let _ = double.call(&mut vm, &[Value::int(i)]).unwrap(); - } - let cached_time = start.elapsed(); - - // Cached should be faster (or at least not slower) - // We don't assert a specific ratio since it depends on the machine, - // but print for manual inspection - eprintln!( - "100k calls: uncached={:?}, cached={:?}, speedup={:.2}x", - uncached_time, - cached_time, - uncached_time.as_nanos() as f64 / cached_time.as_nanos() as f64 - ); -} diff --git a/aelys/tests/global_layout_tests.rs b/aelys/tests/global_layout_tests.rs deleted file mode 100644 index d185a35..0000000 --- a/aelys/tests/global_layout_tests.rs +++ /dev/null @@ -1,28 +0,0 @@ -use aelys_runtime::GlobalLayout; -use std::sync::Arc; - -#[test] -fn global_layout_interns_by_names() { - let a = GlobalLayout::new(vec!["alpha".to_string(), "beta".to_string()]); - let b = GlobalLayout::new(vec!["alpha".to_string(), "beta".to_string()]); - assert!(Arc::ptr_eq(&a, &b)); - assert_eq!(a.id(), b.id()); -} - -#[test] -fn global_layout_ids_unique_for_different_names() { - let a = GlobalLayout::new(vec!["alpha".to_string()]); - let b = GlobalLayout::new(vec!["beta".to_string()]); - assert_ne!(a.id(), 0); - assert_ne!(b.id(), 0); - assert_ne!(a.id(), b.id()); -} - -#[test] -fn global_layout_empty_is_singleton() { - let a = GlobalLayout::empty(); - let b = GlobalLayout::empty(); - assert!(Arc::ptr_eq(&a, &b)); - assert_eq!(a.id(), 0); - assert_eq!(b.id(), 0); -} diff --git a/aelys/tests/heap_tests.rs b/aelys/tests/heap_tests.rs deleted file mode 100644 index 143ad95..0000000 --- a/aelys/tests/heap_tests.rs +++ /dev/null @@ -1,205 +0,0 @@ -//! Tests for the Aelys Heap (garbage collector) - -use aelys_runtime::{Function, Heap, ObjectKind}; - -#[test] -fn test_heap_creation() { - let heap = Heap::new(); - assert_eq!(heap.object_count(), 0); - assert_eq!(heap.bytes_allocated(), 0); - assert!(heap.next_gc_threshold() > 0); -} - -#[test] -fn test_alloc_string() { - let mut heap = Heap::new(); - let gc_ref = heap.alloc_string("hello"); - - assert_eq!(heap.object_count(), 1); - assert!(heap.bytes_allocated() > 0); - - let obj = heap.get(gc_ref).expect("Object should exist"); - match &obj.kind { - ObjectKind::String(s) => { - assert_eq!(s.as_str(), "hello"); - } - _ => panic!("Expected String object"), - } -} - -#[test] -fn test_intern_string() { - let mut heap = Heap::new(); - - // Intern same string twice - let ref1 = heap.intern_string("test"); - let ref2 = heap.intern_string("test"); - - // Should return same reference - assert_eq!(ref1, ref2); - assert_eq!(heap.object_count(), 1); -} - -#[test] -fn test_intern_different_strings() { - let mut heap = Heap::new(); - - let ref1 = heap.intern_string("hello"); - let ref2 = heap.intern_string("world"); - - // Should be different references - assert_ne!(ref1, ref2); - assert_eq!(heap.object_count(), 2); -} - -#[test] -fn test_alloc_function() { - let mut heap = Heap::new(); - let func = Function::new(Some("test".to_string()), 2); - let gc_ref = heap.alloc_function(func); - - let obj = heap.get(gc_ref).expect("Object should exist"); - match &obj.kind { - ObjectKind::Function(f) => { - assert_eq!(f.name(), Some("test")); - assert_eq!(f.arity(), 2); - } - _ => panic!("Expected Function object"), - } -} - -#[test] -fn test_alloc_native() { - let mut heap = Heap::new(); - let gc_ref = heap.alloc_native("test_fn", 0); - - let obj = heap.get(gc_ref).expect("Object should exist"); - match &obj.kind { - ObjectKind::Native(n) => { - assert_eq!(n.name, "test_fn"); - assert_eq!(n.arity, 0); - } - _ => panic!("Expected Native object"), - } -} - -#[test] -fn test_mark_and_sweep() { - let mut heap = Heap::new(); - - // Allocate some objects - let ref1 = heap.alloc_string("keep me"); - let _ref2 = heap.alloc_string("free me"); - let ref3 = heap.alloc_string("keep me too"); - - assert_eq!(heap.object_count(), 3); - let initial_bytes = heap.bytes_allocated(); - - // Mark only ref1 and ref3 as reachable - heap.mark(ref1); - heap.mark(ref3); - - // Sweep should free ref2 - let freed = heap.sweep(); - - assert_eq!(freed, 1); - assert_eq!(heap.object_count(), 2); - assert!(heap.bytes_allocated() < initial_bytes); - - // Verify kept objects are still accessible - assert!(heap.get(ref1).is_some()); - assert!(heap.get(ref3).is_some()); -} - -#[test] -fn test_gc_threshold() { - let mut heap = Heap::new(); - - let initial_threshold = heap.next_gc_threshold(); - - // Allocate until we approach threshold - for _ in 0..100 { - heap.alloc_string("test string to fill heap"); - } - - // Should consider collecting - if heap.should_collect() { - // After sweep, threshold should grow - heap.sweep(); - assert!(heap.next_gc_threshold() >= heap.bytes_allocated()); - } - - assert!(heap.next_gc_threshold() >= initial_threshold); -} - -#[test] -fn test_free_list_reuse() { - let mut heap = Heap::new(); - - // Allocate objects - let ref1 = heap.alloc_string("first"); - let ref2 = heap.alloc_string("second"); - - assert_eq!(heap.object_count(), 2); - - // Mark only ref1, sweep ref2 - heap.mark(ref1); - heap.sweep(); - - assert_eq!(heap.object_count(), 1); - - // Allocate new object - should reuse freed slot - let ref3 = heap.alloc_string("third"); - - // ref3 should have reused ref2's slot - assert_eq!(ref3.index(), ref2.index()); - assert_eq!(heap.object_count(), 2); -} - -#[test] -fn test_fnv1a_hash() { - // Test FNV-1a hash function - let hash1 = Heap::fnv1a_hash(b"hello"); - let hash2 = Heap::fnv1a_hash(b"hello"); - let hash3 = Heap::fnv1a_hash(b"world"); - - // Same input produces same hash - assert_eq!(hash1, hash2); - - // Different input produces different hash (usually) - assert_ne!(hash1, hash3); -} - -#[test] -fn test_get_mut() { - let mut heap = Heap::new(); - let gc_ref = heap.alloc_string("test"); - - // Get mutable reference and mark - if let Some(obj) = heap.get_mut(gc_ref) { - assert!(!obj.marked); - obj.marked = true; - } - - // Verify mark persisted - let obj = heap.get(gc_ref).unwrap(); - assert!(obj.marked); -} - -#[test] -fn test_sweep_updates_threshold() { - let mut heap = Heap::new(); - - // Allocate and free all objects - let _ref1 = heap.alloc_string("benchmarks"); - - // Sweep without marking anything - heap.sweep(); - - // All objects freed - assert_eq!(heap.object_count(), 0); - assert_eq!(heap.bytes_allocated(), 0); - - // Threshold should be at least initial - assert!(heap.next_gc_threshold() >= Heap::INITIAL_GC_THRESHOLD); -} diff --git a/aelys/tests/implicit_widening_tests.rs b/aelys/tests/implicit_widening_tests.rs new file mode 100644 index 0000000..333e15a --- /dev/null +++ b/aelys/tests/implicit_widening_tests.rs @@ -0,0 +1,193 @@ +use aelys_air::lower::lower; +use aelys_air::{AirProgram, AirStmtKind, AirType, Rvalue}; +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_sema::types::InferType; +use aelys_syntax::Source; + +fn source_to_air(code: &str) -> AirProgram { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()) + .scan() + .expect("should never happen"); + let ast = Parser::new(tokens, src.clone()) + .parse() + .expect("meow meow meow "); + let typed = TypeInference::infer_program(ast, src).expect("sema failed"); + lower(&typed) +} + +fn collect_casts(air: &AirProgram) -> Vec<(&AirType, &AirType)> { + let mut casts = Vec::new(); + for func in &air.functions { + for block in &func.blocks { + for stmt in &block.stmts { + if let AirStmtKind::Assign { + rvalue: Rvalue::Cast { from, to, .. }, + .. + } = &stmt.kind + { + casts.push((from, to)); + } + } + } + } + casts +} + +#[test] +fn signed_to_wider_signed() { + assert!(InferType::I8.can_implicit_widen_to(&InferType::I16)); + assert!(InferType::I8.can_implicit_widen_to(&InferType::I32)); + assert!(InferType::I8.can_implicit_widen_to(&InferType::I64)); + assert!(InferType::I16.can_implicit_widen_to(&InferType::I32)); + assert!(InferType::I16.can_implicit_widen_to(&InferType::I64)); + assert!(InferType::I32.can_implicit_widen_to(&InferType::I64)); +} + +#[test] +fn unsigned_to_wider_unsigned() { + assert!(InferType::U8.can_implicit_widen_to(&InferType::U16)); + assert!(InferType::U8.can_implicit_widen_to(&InferType::U32)); + assert!(InferType::U8.can_implicit_widen_to(&InferType::U64)); + assert!(InferType::U16.can_implicit_widen_to(&InferType::U32)); + assert!(InferType::U16.can_implicit_widen_to(&InferType::U64)); + assert!(InferType::U32.can_implicit_widen_to(&InferType::U64)); +} + +#[test] +fn unsigned_to_wider_signed() { + assert!(InferType::U8.can_implicit_widen_to(&InferType::I16)); + assert!(InferType::U8.can_implicit_widen_to(&InferType::I32)); + assert!(InferType::U8.can_implicit_widen_to(&InferType::I64)); + assert!(InferType::U16.can_implicit_widen_to(&InferType::I32)); + assert!(InferType::U16.can_implicit_widen_to(&InferType::I64)); + assert!(InferType::U32.can_implicit_widen_to(&InferType::I64)); +} + +#[test] +fn small_int_to_float() { + // ≤16-bit ints => f32 (24-bit mantissa) + assert!(InferType::I8.can_implicit_widen_to(&InferType::F32)); + assert!(InferType::U8.can_implicit_widen_to(&InferType::F32)); + assert!(InferType::I16.can_implicit_widen_to(&InferType::F32)); + assert!(InferType::U16.can_implicit_widen_to(&InferType::F32)); + + // ≤32-bit ints => f64 (53-bit mantissa) + assert!(InferType::I8.can_implicit_widen_to(&InferType::F64)); + assert!(InferType::U8.can_implicit_widen_to(&InferType::F64)); + assert!(InferType::I16.can_implicit_widen_to(&InferType::F64)); + assert!(InferType::U16.can_implicit_widen_to(&InferType::F64)); + assert!(InferType::I32.can_implicit_widen_to(&InferType::F64)); + assert!(InferType::U32.can_implicit_widen_to(&InferType::F64)); +} + +#[test] +fn rejects_lossy_conversions() { + // Narrowing + assert!(!InferType::I64.can_implicit_widen_to(&InferType::I32)); + assert!(!InferType::I32.can_implicit_widen_to(&InferType::I16)); + assert!(!InferType::I16.can_implicit_widen_to(&InferType::I8)); + + // Signed => unsigned + assert!(!InferType::I8.can_implicit_widen_to(&InferType::U8)); + assert!(!InferType::I8.can_implicit_widen_to(&InferType::U16)); + assert!(!InferType::I32.can_implicit_widen_to(&InferType::U64)); + + // i32/u32 => f32 (lossy: >24-bit mantissa) + assert!(!InferType::I32.can_implicit_widen_to(&InferType::F32)); + assert!(!InferType::U32.can_implicit_widen_to(&InferType::F32)); + + // i64/u64 => f64 (lossy: >53-bit mantissa) + assert!(!InferType::I64.can_implicit_widen_to(&InferType::F64)); + assert!(!InferType::U64.can_implicit_widen_to(&InferType::F64)); + + // Float => int + assert!(!InferType::F32.can_implicit_widen_to(&InferType::I32)); + assert!(!InferType::F64.can_implicit_widen_to(&InferType::I64)); + + // Float => float (f32=>f64 is not int widening, source is float) + assert!(!InferType::F32.can_implicit_widen_to(&InferType::F64)); + + // same type + assert!(!InferType::I64.can_implicit_widen_to(&InferType::I64)); + + // non-numeric + assert!(!InferType::Bool.can_implicit_widen_to(&InferType::I32)); + assert!(!InferType::String.can_implicit_widen_to(&InferType::I64)); +} + +#[test] +fn i8_arg_to_i64_param_inserts_cast() { + let code = r#" +fn takes_i64(x: i64) -> i64 { x } +fn main() { + let v: i8 = 5 + takes_i64(v) +} +"#; + let air = source_to_air(code); + let casts = collect_casts(&air); + assert!( + casts + .iter() + .any(|(from, to)| *from == &AirType::I8 && *to == &AirType::I64), + "expected implicit cast i8=>i64, got: {casts:?}" + ); +} + +#[test] +fn u8_arg_to_i32_param_inserts_cast() { + let code = r#" +fn takes_i32(x: i32) -> i32 { x } +fn main() { + let v: u8 = 5 + takes_i32(v) +} +"#; + let air = source_to_air(code); + let casts = collect_casts(&air); + assert!( + casts + .iter() + .any(|(from, to)| *from == &AirType::U8 && *to == &AirType::I32), + "expected implicit cast u8=>i32, got: {casts:?}" + ); +} + +#[test] +fn i32_arg_to_f64_param_inserts_cast() { + let code = r#" +fn takes_f64(x: f64) -> f64 { x } +fn main() { + let v: i32 = 42 + takes_f64(v) +} +"#; + let air = source_to_air(code); + let casts = collect_casts(&air); + assert!( + casts + .iter() + .any(|(from, to)| *from == &AirType::I32 && *to == &AirType::F64), + "expected implicit cast i32=>f64, got: {casts:?}" + ); +} + +#[test] +fn literal_narrowing_still_works() { + // literal 5 should narrow to i8 without needing a cast + let code = r#" +fn takes_i8(x: i8) -> i8 { x } +fn main() { + takes_i8(5) +} +"#; + let air = source_to_air(code); + let casts = collect_casts(&air); + assert!( + casts.is_empty(), + "literal narrowing should not produce a cast, got: {casts:?}" + ); +} diff --git a/aelys/tests/inline_cache_tests.rs b/aelys/tests/inline_cache_tests.rs deleted file mode 100644 index 55c5739..0000000 --- a/aelys/tests/inline_cache_tests.rs +++ /dev/null @@ -1,455 +0,0 @@ -mod common; - -use aelys_bytecode::asm::disassemble; -use aelys_driver::pipeline::{compilation_pipeline, compilation_pipeline_with_opt}; -use aelys_opt::OptimizationLevel; -use aelys_runtime::{VM, stdlib}; -use aelys_syntax::Source; -use common::assert_aelys_int; - -#[test] -fn test_builtin_call_in_loop_uses_cache() { - assert_aelys_int( - r#" - let ptr = alloc(100) - for i in 0..100 { - store(ptr, i, i * 2) - } - load(ptr, 50) - "#, - 100, - ); -} - -#[test] -fn test_recursive_function_with_cache() { - assert_aelys_int( - r#" - fn fib(n: int) -> int { - if n <= 1 { return n } - return fib(n - 1) + fib(n - 2) - } - fib(20) - "#, - 6765, - ); -} - -#[test] -fn test_call_global_mono_patches_correctly() { - assert_aelys_int( - r#" - fn increment(x: int) -> int { return x + 1 } - let mut sum = 0 - for i in 0..1000 { - sum = increment(sum) - } - sum - "#, - 1000, - ); -} - -#[test] -fn test_call_global_native_patches_correctly() { - assert_aelys_int( - r#" - let ptr = alloc(1) - let mut val = 0 - for i in 0..1000 { - store(ptr, 0, i) - val = load(ptr, 0) - } - val - "#, - 999, - ); -} - -#[test] -fn test_nested_function_calls_with_cache() { - assert_aelys_int( - r#" - fn add(a: int, b: int) -> int { return a + b } - fn mul(a: int, b: int) -> int { return a * b } - fn compute(x: int) -> int { return add(mul(x, 2), mul(x, 3)) } - compute(10) - "#, - 50, - ); -} - -#[test] -fn test_alternating_function_calls() { - assert_aelys_int( - r#" - fn even_step(x: int) -> int { return x + 2 } - fn odd_step(x: int) -> int { return x + 1 } - let mut val = 0 - for i in 0..100 { - if i % 2 == 0 { - val = even_step(val) - } else { - val = odd_step(val) - } - } - val - "#, - 150, - ); -} - -#[test] -fn test_cache_with_different_arities() { - assert_aelys_int( - r#" - fn zero() -> int { return 0 } - fn one(a: int) -> int { return a } - fn two(a: int, b: int) -> int { return a + b } - fn three(a: int, b: int, c: int) -> int { return a + b + c } - - let mut sum = 0 - for i in 0..100 { - sum += zero() + one(1) + two(1, 2) + three(1, 2, 3) - } - sum - "#, - 1000, - ); -} - -#[test] -fn test_disassembler_skips_cache_words() { - // use O0 to prevent inlining so we can test call opcodes - let mut pipeline = compilation_pipeline_with_opt(OptimizationLevel::None); - - let source = r#" - fn foo() -> int { return 42 } - foo() + foo() - "#; - - let src = Source::new("test", source); - let (func, _heap) = pipeline.compile(src).expect("compile failed"); - let output = disassemble(&func, None); - - let lines: Vec<&str> = output.lines().collect(); - let mut prev_offset: Option = None; - let mut found_gap = false; - - for line in &lines { - let trimmed = line.trim(); - if trimmed.starts_with(|c: char| c.is_ascii_digit()) - && let Some(offset_str) = trimmed.split(':').next() - && let Ok(offset) = offset_str.parse::() - { - if let Some(prev) = prev_offset - && offset > prev - && offset - prev == 3 - { - found_gap = true; - } - prev_offset = Some(offset); - } - } - - assert!( - found_gap, - "Disassembler should show gaps of 3 (instruction + 2 cache words). Output:\n{}", - output - ); -} - -#[test] -fn test_module_exports_contains_native_functions() { - let source = Source::new("", ""); - let mut vm = VM::new(source).unwrap(); - - let exports = stdlib::register_std_module(&mut vm, "math").expect("register math failed"); - - assert!( - !exports.native_functions.is_empty(), - "math module should export native functions" - ); - - let has_sqrt = exports.native_functions.iter().any(|n| n.contains("sqrt")); - let has_sin = exports.native_functions.iter().any(|n| n.contains("sin")); - let has_cos = exports.native_functions.iter().any(|n| n.contains("cos")); - - assert!(has_sqrt, "math should export sqrt"); - assert!(has_sin, "math should export sin"); - assert!(has_cos, "math should export cos"); -} - -#[test] -fn test_stdlib_modules_export_native_functions() { - let modules = ["math", "io", "string", "convert", "time"]; - let source = Source::new("", ""); - - for module_name in &modules { - let mut vm = VM::new(source.clone()).unwrap(); - let exports = stdlib::register_std_module(&mut vm, module_name) - .unwrap_or_else(|_| panic!("register {} failed", module_name)); - - assert!( - !exports.native_functions.is_empty(), - "{} module should export native functions, got empty list", - module_name - ); - - for func_name in &exports.native_functions { - assert!( - func_name.starts_with(&format!("{}::", module_name)), - "Function '{}' should be prefixed with '{}::'", - func_name, - module_name - ); - } - } -} - -#[test] -fn test_builtin_functions_work() { - let source = r#" - let p = alloc(8) - store(p, 0, 100) - let v = load(p, 0) - free(p) - v - "#; - - assert_aelys_int(source, 100); -} - -#[test] -fn test_heavy_builtin_call_loop() { - assert_aelys_int( - r#" - let ptr = alloc(1) - store(ptr, 0, 0) - for i in 0..10000 { - let v = load(ptr, 0) - store(ptr, 0, v + 1) - } - load(ptr, 0) - "#, - 10000, - ); -} - -#[test] -fn test_interleaved_aelys_and_builtin_calls() { - assert_aelys_int( - r#" - fn increment(x: int) -> int { return x + 1 } - fn decrement(x: int) -> int { return x - 1 } - - let ptr = alloc(1) - store(ptr, 0, 0) - - for i in 0..1000 { - let v = load(ptr, 0) - let v2 = increment(v) - let v3 = increment(v2) - let v4 = decrement(v3) - store(ptr, 0, v4) - } - - load(ptr, 0) - "#, - 1000, - ); -} - -#[test] -fn test_memory_opcodes_in_bytecode() { - let mut pipeline = compilation_pipeline(); - - let source = r#" - let ptr = alloc(10) - store(ptr, 0, 42) - load(ptr, 0) - "#; - - let src = Source::new("test", source); - let (func, _heap) = pipeline.compile(src).expect("compile failed"); - - let mut found_alloc = false; - let mut found_store = false; - let mut found_load = false; - - for &instr in func.bytecode.as_slice() { - let opcode = (instr >> 24) as u8; - match opcode { - 28 => found_alloc = true, // Alloc - 32 | 33 => found_store = true, // StoreMem / StoreMemI - 30 | 31 => found_load = true, // LoadMem / LoadMemI - _ => {} - } - } - - assert!(found_alloc, "Expected Alloc opcode (28) in bytecode"); - assert!( - found_store, - "Expected StoreMem/StoreMemI opcode (32/33) in bytecode" - ); - assert!( - found_load, - "Expected LoadMem/LoadMemI opcode (30/31) in bytecode" - ); -} - -#[test] -fn test_call_global_opcode_for_aelys_functions() { - // use O0 to prevent inlining so we can verify CallGlobal opcodes - let mut pipeline = compilation_pipeline_with_opt(OptimizationLevel::None); - - let source = r#" - fn double(x: int) -> int { return x * 2 } - fn triple(x: int) -> int { return x * 3 } - double(5) + triple(3) - "#; - - let src = Source::new("test", source); - let (func, _heap) = pipeline.compile(src).expect("compile failed"); - - let mut found_call_global = 0; - for &instr in func.bytecode.as_slice() { - let opcode = (instr >> 24) as u8; - if opcode == 77 { - found_call_global += 1; - } - } - - assert!( - found_call_global >= 2, - "Expected at least 2 CallGlobal opcodes (double/triple), found {}", - found_call_global - ); -} - -#[test] -fn test_cache_words_present_after_call_opcodes() { - // use O0 to prevent inlining - let mut pipeline = compilation_pipeline_with_opt(OptimizationLevel::None); - - let source = r#" - fn foo() -> int { return 42 } - foo() - "#; - - let src = Source::new("test", source); - let (func, _heap) = pipeline.compile(src).expect("compile failed"); - - let bytecode = func.bytecode.as_slice(); - let mut call_positions = Vec::new(); - for (i, &instr) in bytecode.iter().enumerate() { - let opcode = (instr >> 24) as u8; - if opcode == 77 || opcode == 78 || opcode == 104 { - call_positions.push(i); - } - } - - for pos in call_positions { - assert!( - pos + 2 < bytecode.len(), - "Call opcode at position {} should have 2 cache words following it", - pos - ); - } -} - -#[test] -fn test_aelys_function_repeated_calls_same_result() { - assert_aelys_int( - r#" - fn double(x: int) -> int { return x * 2 } - let a = double(5) - let b = double(5) - let c = double(5) - a + b + c - "#, - 30, - ); -} - -#[test] -fn test_type_builtin_uses_cache() { - assert_aelys_int( - r#" - let t1 = type(42) - let t2 = type(3.14) - let t3 = type("hello") - let t4 = type(true) - 42 - "#, - 42, - ); -} - -#[test] -fn test_mixed_aelys_and_builtin_in_expression() { - assert_aelys_int( - r#" - fn square(x: int) -> int { return x * x } - let ptr = alloc(1) - store(ptr, 0, square(5)) - load(ptr, 0) + square(3) - "#, - 34, - ); -} - -#[test] -fn test_call_in_conditional() { - assert_aelys_int( - r#" - fn is_even(n: int) -> bool { return n % 2 == 0 } - fn double(n: int) -> int { return n * 2 } - fn triple(n: int) -> int { return n * 3 } - - let mut sum = 0 - for i in 0..10 { - if is_even(i) { - sum += double(i) - } else { - sum += triple(i) - } - } - sum - "#, - 115, - ); -} - -#[test] -fn test_deeply_nested_calls() { - assert_aelys_int( - r#" - fn a(x: int) -> int { return x + 1 } - fn b(x: int) -> int { return a(x) + 1 } - fn c(x: int) -> int { return b(x) + 1 } - fn d(x: int) -> int { return c(x) + 1 } - fn e(x: int) -> int { return d(x) + 1 } - e(0) - "#, - 5, - ); -} - -#[test] -fn test_function_call_chain() { - assert_aelys_int( - r#" - fn step1(n: int) -> int { - if n <= 0 { return 0 } - return step2(n - 1) + 1 - } - fn step2(n: int) -> int { - if n <= 0 { return 0 } - return step1(n - 1) + 2 - } - step1(5) - "#, - 7, - ); -} diff --git a/aelys/tests/inline_decorator_tests.rs b/aelys/tests/inline_decorator_tests.rs deleted file mode 100644 index f712d9c..0000000 --- a/aelys/tests/inline_decorator_tests.rs +++ /dev/null @@ -1,484 +0,0 @@ -use aelys::run_with_config_and_opt; -use aelys_common::warning::WarningKind; -use aelys_frontend::lexer::Lexer; -use aelys_frontend::parser::Parser; -use aelys_opt::{OptimizationLevel, Optimizer}; -use aelys_runtime::{Value, VmConfig}; -use aelys_sema::TypeInference; -use aelys_syntax::Source; - -fn run_opt(src: &str, level: OptimizationLevel) -> Value { - run_with_config_and_opt(src, "", VmConfig::default(), Vec::new(), level) - .expect("should run") -} - -fn optimize_and_get_warnings(src: &str, level: OptimizationLevel) -> Vec { - let source = Source::new("", src); - let tokens = Lexer::with_source(source.clone()).scan().unwrap(); - let stmts = Parser::new(tokens, source.clone()).parse().unwrap(); - let typed = TypeInference::infer_program(stmts, source).unwrap(); - let mut opt = Optimizer::new(level); - let _ = opt.optimize(typed); - opt.warnings().iter().map(|w| w.kind.clone()).collect() -} - -// basic inline behavior -#[test] -fn inline_simple_function() { - let src = r#" - @inline - fn double(x: int) -> int { x * 2 } - double(5) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(10)); -} - -#[test] -fn inline_preserves_semantics() { - let src = r#" - @inline - fn add(a: int, b: int) -> int { a + b } - add(3, 4) + add(10, 20) - "#; - let o0 = run_opt(src, OptimizationLevel::None).as_int(); - let o2 = run_opt(src, OptimizationLevel::Standard).as_int(); - assert_eq!(o0, o2); - assert_eq!(o2, Some(37)); -} - -#[test] -fn inline_always_forces_inlining() { - let src = r#" - @inline_always - fn triple(x: int) -> int { x * 3 } - triple(7) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(21)); -} - -#[test] -fn inline_trivial_function_auto_inlines() { - let src = r#" - fn tiny(x: int) -> int { x + 1 } - tiny(99) - "#; - assert_eq!( - run_opt(src, OptimizationLevel::Standard).as_int(), - Some(100) - ); -} - -#[test] -fn inline_single_call_site_auto_inlines() { - let src = r#" - fn helper(x: int) -> int { - let a = x * 2 - let b = a + 10 - b - } - helper(5) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(20)); -} - -// warning tests -#[test] -fn warn_on_recursive_inline() { - let src = r#" - @inline - fn factorial(n: int) -> int { - if n <= 1 { 1 } else { n * factorial(n - 1) } - } - factorial(5) - "#; - let warnings = optimize_and_get_warnings(src, OptimizationLevel::Standard); - assert!( - warnings - .iter() - .any(|k| matches!(k, WarningKind::InlineRecursive)) - ); -} - -#[test] -fn warn_on_mutual_recursion() { - let src = r#" - @inline - fn ping(n: int) -> int { - if n <= 0 { 0 } else { pong(n - 1) } - } - - @inline - fn pong(n: int) -> int { - if n <= 0 { 1 } else { ping(n - 1) } - } - - ping(5) - "#; - let warnings = optimize_and_get_warnings(src, OptimizationLevel::Standard); - assert!( - warnings - .iter() - .any(|k| matches!(k, WarningKind::InlineMutualRecursion { .. })) - ); -} - -// nested function @inline is currently a no-op (optimizer only considers top-level functions) -#[test] -fn nested_inline_is_noop() { - let src = r#" - fn outer() -> int { - let captured = 42 - - @inline - fn inner() -> int { captured } - - inner() - } - outer() - "#; - // should still execute correctly, just no inlining happens - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(42)); - // no warning because nested functions aren't analyzed for inlining - let warnings = optimize_and_get_warnings(src, OptimizationLevel::Standard); - assert!(warnings.is_empty()); -} - -// top-level function with closure capture (lambda assigned to variable) -#[test] -fn top_level_closure_with_capture() { - let src = r#" - let base = 100 - - @inline - fn uses_global() -> int { base } - - uses_global() - "#; - // executes correctly even though top-level globals are involved - assert_eq!( - run_opt(src, OptimizationLevel::Standard).as_int(), - Some(100) - ); -} - -#[test] -fn recursive_inline_always_still_warns() { - let src = r#" - @inline_always - fn infinite(n: int) -> int { - infinite(n + 1) - } - 0 - "#; - let warnings = optimize_and_get_warnings(src, OptimizationLevel::Standard); - assert!( - warnings - .iter() - .any(|k| matches!(k, WarningKind::InlineRecursive)) - ); -} - -// multiple functions -#[test] -fn inline_multiple_functions() { - let src = r#" - @inline - fn square(x: int) -> int { x * x } - - @inline - fn cube(x: int) -> int { x * x * x } - - square(3) + cube(2) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(17)); -} - -#[test] -fn inline_chain_calls() { - let src = r#" - @inline - fn inc(x: int) -> int { x + 1 } - - @inline - fn double(x: int) -> int { x * 2 } - - double(inc(5)) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(12)); -} - -// edge cases -#[test] -fn inline_with_no_args() { - let src = r#" - @inline - fn constant() -> int { 42 } - constant() - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(42)); -} - -#[test] -fn inline_returning_float() { - let src = r#" - @inline - fn half(x: float) -> float { x / 2.0 } - half(10.0) - "#; - assert_eq!( - run_opt(src, OptimizationLevel::Standard).as_float(), - Some(5.0) - ); -} - -#[test] -fn inline_with_bool() { - let src = r#" - @inline - fn negate(b: bool) -> bool { not b } - negate(true) - "#; - assert_eq!( - run_opt(src, OptimizationLevel::Standard).as_bool(), - Some(false) - ); -} - -// O0 should NOT inline -#[test] -fn no_inlining_at_o0() { - let src = r#" - @inline - fn double(x: int) -> int { x * 2 } - double(5) - "#; - let warnings = optimize_and_get_warnings(src, OptimizationLevel::None); - assert!(warnings.is_empty()); -} - -// aggressive mode -#[test] -fn aggressive_mode_inlines_more() { - let src = r#" - fn medium_sized(x: int) -> int { - let a = x + 1 - let b = a * 2 - let c = b - 3 - let d = c / 2 - d - } - medium_sized(10) + medium_sized(20) - "#; - let o2 = run_opt(src, OptimizationLevel::Standard).as_int(); - let o3 = run_opt(src, OptimizationLevel::Aggressive).as_int(); - assert_eq!(o2, o3); -} - -// complex expressions -#[test] -fn inline_binary_expression() { - let src = r#" - @inline - fn add(a: int, b: int) -> int { a + b } - - let result = add(1, 2) * add(3, 4) - result - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(21)); -} - -#[test] -fn inline_in_if_condition() { - let src = r#" - @inline - fn is_positive(x: int) -> bool { x > 0 } - - if is_positive(5) { 100 } else { 0 } - "#; - assert_eq!( - run_opt(src, OptimizationLevel::Standard).as_int(), - Some(100) - ); -} - -#[test] -fn inline_in_loop() { - let src = r#" - @inline - fn increment(x: int) -> int { x + 1 } - - let mut sum = 0 - for i in 0..5 { - sum = increment(sum) - } - sum - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(5)); -} - -// semantics preservation under all opt levels -#[test] -fn semantics_preserved_all_levels() { - let src = r#" - @inline - fn calc(a: int, b: int) -> int { - a * b + a - b - } - calc(7, 3) - "#; - let o0 = run_opt(src, OptimizationLevel::None).as_int(); - let o1 = run_opt(src, OptimizationLevel::Basic).as_int(); - let o2 = run_opt(src, OptimizationLevel::Standard).as_int(); - let o3 = run_opt(src, OptimizationLevel::Aggressive).as_int(); - - assert_eq!(o0, o1); - assert_eq!(o1, o2); - assert_eq!(o2, o3); - assert_eq!(o3, Some(25)); // 7*3 + 7 - 3 = 21 + 4 = 25 -} - -// decorator parsing -#[test] -fn decorator_on_function_only() { - let src = r#" - @inline - fn decorated() -> int { 1 } - decorated() - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(1)); -} - -#[test] -fn multiple_decorators() { - // @inline and @inline_always shouldn't conflict, inline_always takes precedence - let src = r#" - @inline_always - fn force_inline(x: int) -> int { x + 1 } - force_inline(10) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(11)); -} - -// indirect recursion through 3+ functions -#[test] -fn warn_on_triple_mutual_recursion() { - let src = r#" - @inline - fn a(n: int) -> int { if n <= 0 { 0 } else { b(n - 1) } } - - @inline - fn b(n: int) -> int { if n <= 0 { 1 } else { c(n - 1) } } - - @inline - fn c(n: int) -> int { if n <= 0 { 2 } else { a(n - 1) } } - - a(3) - "#; - let warnings = optimize_and_get_warnings(src, OptimizationLevel::Standard); - assert!( - warnings - .iter() - .any(|k| matches!(k, WarningKind::InlineMutualRecursion { .. })) - ); -} - -// function with side effects (still inlines, just not pure) -#[test] -fn inline_function_with_side_effects() { - let src = r#" - let mut counter = 0 - - @inline - fn inc_and_get() -> int { - counter = counter + 1 - counter - } - - let a = inc_and_get() - let b = inc_and_get() - a + b - "#; - // even with side effects, behavior should be consistent - let o0 = run_opt(src, OptimizationLevel::None).as_int(); - let o2 = run_opt(src, OptimizationLevel::Standard).as_int(); - assert_eq!(o0, o2); -} - -// regression: inline with string type -#[test] -fn inline_string_return() { - let src = r#" - @inline - fn greet(name: string) -> string { "Hello, " + name } - greet("World") - "#; - run_opt(src, OptimizationLevel::Standard); // just check no crash -} - -// inline function called with expressions as arguments -#[test] -fn inline_with_complex_args() { - let src = r#" - @inline - fn add(a: int, b: int) -> int { a + b } - - fn other() -> int { 5 } - - add(other() * 2, 3 + 4) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(17)); -} - -// public function shouldn't have special treatment in optimizer (that's compiler concern) -#[test] -fn inline_public_function() { - let src = r#" - @inline - pub fn public_fn(x: int) -> int { x * 2 } - public_fn(7) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(14)); -} - -// multiple decorators -#[test] -fn inline_combined_with_no_gc() { - let src = r#" - @no_gc - @inline - fn fast_add(a: int, b: int) -> int { a + b } - fast_add(10, 20) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(30)); -} - -#[test] -fn inline_always_combined_with_no_gc() { - let src = r#" - @inline_always - @no_gc - fn fast_mul(a: int, b: int) -> int { a * b } - fast_mul(7, 8) - "#; - assert_eq!(run_opt(src, OptimizationLevel::Standard).as_int(), Some(56)); -} - -// stats: check inliner actually counts something -#[test] -fn optimizer_stats_track_inlining() { - let src = r#" - @inline - fn double(x: int) -> int { x * 2 } - double(1) + double(2) + double(3) - "#; - let source = Source::new("", src); - let tokens = Lexer::with_source(source.clone()).scan().unwrap(); - let stmts = Parser::new(tokens, source.clone()).parse().unwrap(); - let typed = TypeInference::infer_program(stmts, source).unwrap(); - let mut opt = Optimizer::new(OptimizationLevel::Standard); - let _ = opt.optimize(typed); - // inliner should report some activity (stats might be > 0) - let stats = opt.stats(); - // just verify stats are accessible - let _ = stats.functions_inlined; -} diff --git a/aelys/tests/integration_test.rs b/aelys/tests/integration_test.rs deleted file mode 100644 index ec84c13..0000000 --- a/aelys/tests/integration_test.rs +++ /dev/null @@ -1,536 +0,0 @@ -/// Integration tests for the Aelys language. -/// -/// These tests verify end-to-end execution of Aelys programs, -/// running the full pipeline: Lexer -> Parser -> Compiler -> VM. -use aelys::run; -use aelys_runtime::Value; - -/// Helper to run code and expect success -fn run_ok(source: &str) -> Value { - run(source, "test.aelys").expect("Expected program to run successfully") -} - -/// Helper to run code and expect an error -fn run_err(source: &str) -> String { - match run(source, "test.aelys") { - Ok(_) => panic!("Expected program to fail, but it succeeded"), - Err(e) => format!("{}", e), - } -} - -#[test] -fn test_arithmetic() { - // Simple arithmetic - let result = run_ok("1 + 2;"); - assert_eq!(result.as_int(), Some(3)); - - let result = run_ok("10 - 3;"); - assert_eq!(result.as_int(), Some(7)); - - let result = run_ok("4 * 5;"); - assert_eq!(result.as_int(), Some(20)); - - let result = run_ok("20 / 4;"); - assert_eq!(result.as_int(), Some(5)); - - let result = run_ok("17 % 5;"); - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_complex_arithmetic() { - let result = run_ok("(1 + 2) * 3;"); - assert_eq!(result.as_int(), Some(9)); - - let result = run_ok("10 - 2 * 3;"); - assert_eq!(result.as_int(), Some(4)); - - let result = run_ok("(10 - 2) * 3;"); - assert_eq!(result.as_int(), Some(24)); -} - -#[test] -fn test_variables() { - let result = run_ok("let x = 42; x;"); - assert_eq!(result.as_int(), Some(42)); - - let result = run_ok("let x = 10; let y = 20; x + y;"); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_mutable_variables() { - let result = run_ok("let mut x = 10; x = 20; x;"); - assert_eq!(result.as_int(), Some(20)); - - let result = run_ok("let mut count = 0; count++; count;"); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn test_comparisons() { - let result = run_ok("1 < 2;"); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_ok("2 < 1;"); - assert_eq!(result.as_bool(), Some(false)); - - let result = run_ok("1 <= 1;"); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_ok("2 > 1;"); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_ok("1 == 1;"); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_ok("1 != 2;"); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_logical_operators() { - let result = run_ok("true and true;"); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_ok("true and false;"); - assert_eq!(result.as_bool(), Some(false)); - - let result = run_ok("false or true;"); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_ok("false or false;"); - assert_eq!(result.as_bool(), Some(false)); - - let result = run_ok("not true;"); - assert_eq!(result.as_bool(), Some(false)); - - let result = run_ok("not false;"); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_if_statement() { - // if with else at top level returns the value of the taken branch - let result = run_ok("if true { 42 } else { 0 }"); - assert_eq!(result.as_int(), Some(42)); - - let result = run_ok("let x = if true { 42 } else { 0 }; x;"); - assert_eq!(result.as_int(), Some(42)); - - let result = run_ok("let x = if false { 42 } else { 100 }; x;"); - assert_eq!(result.as_int(), Some(100)); -} - -#[test] -fn test_while_loop() { - let result = run_ok( - r#" - let mut i = 0; - let mut sum = 0; - while i < 5 { - sum += i; - i++; - } - sum; - "#, - ); - assert_eq!(result.as_int(), Some(10)); // 0 + 1 + 2 + 3 + 4 -} - -#[test] -fn test_factorial_iterative() { - let result = run_ok( - r#" - let mut n = 5; - let mut result = 1; - while n > 0 { - result *= n; - n--; - } - result; - "#, - ); - assert_eq!(result.as_int(), Some(120)); // 5! = 120 -} - -#[test] -fn test_fibonacci_recursive() { - let result = run_ok( - r#" - fn fib(n) { - if n < 2 { - return n; - } - return fib(n - 1) + fib(n - 2); - } - fib(10); - "#, - ); - assert_eq!(result.as_int(), Some(55)); // fib(10) = 55 -} - -#[test] -fn test_factorial_recursive() { - let result = run_ok( - r#" - fn factorial(n) { - if n <= 1 { - return 1; - } - return n * factorial(n - 1); - } - factorial(5); - "#, - ); - assert_eq!(result.as_int(), Some(120)); // 5! = 120 -} - -#[test] -fn test_fizzbuzz() { - // FizzBuzz for numbers 1-15 - // We'll test by capturing the logic in variables - let result = run_ok( - r#" - let mut i = 1; - let mut count = 0; - while i <= 15 { - if i % 15 == 0 { - count++; - } else { - if i % 3 == 0 { - count++; - } else { - if i % 5 == 0 { - count++; - } - } - } - i++; - } - count; - "#, - ); - assert_eq!(result.as_int(), Some(7)); // Numbers divisible by 3 or 5: 3, 5, 6, 9, 10, 12, 15 -} - -#[test] -fn test_higher_order_functions() { - let result = run_ok( - r#" - fn apply_twice(f, x) { - return f(f(x)); - } - fn increment(n) { - return n + 1; - } - apply_twice(increment, 10); - "#, - ); - assert_eq!(result.as_int(), Some(12)); // increment(increment(10)) = 12 -} - -#[test] -fn test_function_with_multiple_params() { - let result = run_ok( - r#" - fn add(a, b) { - return a + b; - } - fn multiply(a, b) { - return a * b; - } - multiply(add(2, 3), 4); - "#, - ); - assert_eq!(result.as_int(), Some(20)); // (2 + 3) * 4 = 20 -} - -#[test] -fn test_variable_scoping() { - // Test block scoping - inner x shadows outer x - let result = run_ok( - r#" - let x = 1; - { - let x = 2; - let y = x + 1; - } - x; - "#, - ); - assert_eq!(result.as_int(), Some(1)); // Outer x is unchanged - - // Test nested block scoping - let result = run_ok( - r#" - let x = 5; - { - let x = 10; - { - let x = 15; - } - } - x; - "#, - ); - assert_eq!(result.as_int(), Some(5)); // Outer x is still 5 -} - -#[test] -fn test_break_statement() { - let result = run_ok( - r#" - let mut i = 0; - let mut sum = 0; - while i < 100 { - if i == 5 { - break; - } - sum += i; - i++; - } - sum; - "#, - ); - assert_eq!(result.as_int(), Some(10)); // 0 + 1 + 2 + 3 + 4 -} - -#[test] -fn test_continue_statement() { - let result = run_ok( - r#" - let mut i = 0; - let mut sum = 0; - while i < 5 { - i++; - if i == 3 { - continue; - } - sum += i; - } - sum; - "#, - ); - assert_eq!(result.as_int(), Some(12)); // 1 + 2 + 4 + 5 (skips 3) -} - -#[test] -fn test_nested_blocks() { - let result = run_ok( - r#" - let x = 1; - { - let y = 2; - { - let z = 3; - x + y + z; - } - } - x; - "#, - ); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn test_error_undefined_variable() { - let error = run_err("x;"); - assert!(error.contains("undefined variable")); -} - -#[test] -fn test_error_assign_to_immutable() { - let error = run_err("let x = 1; x = 2;"); - assert!(error.contains("immutable") || error.contains("not mutable")); -} - -#[test] -fn test_error_assign_to_immutable_param() { - let error = run_err("fn f(x: int) -> int { x++; x } f(1);"); - assert!(error.contains("immutable") || error.contains("not mutable")); -} - -#[test] -fn test_mut_param_reassign() { - let result = run_ok("fn f(mut x: int) -> int { x++; x } f(10);"); - assert_eq!(result.as_int(), Some(11)); -} - -#[test] -fn test_mut_param_in_loop() { - let result = run_ok( - "fn accumulate(mut acc: int, n: int) -> int { \ - for i in 0..n { acc++ } \ - return acc } \ - accumulate(10, 5);", - ); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_mut_param_does_not_affect_caller() { - let result = run_ok( - "fn inc(mut x: int) -> int { x += 100; x } \ - let a = 1; let b = inc(a); a;", - ); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn test_error_break_outside_loop() { - let error = run_err("break;"); - assert!(error.contains("break") && error.contains("loop")); -} - -#[test] -fn test_error_continue_outside_loop() { - let error = run_err("continue;"); - assert!(error.contains("continue") && error.contains("loop")); -} - -#[test] -fn test_error_division_by_zero() { - let error = run_err("1 / 0;"); - assert!(error.contains("division by zero")); -} - -#[test] -fn test_error_arity_mismatch() { - let error = run_err( - r#" - fn add(a, b) { - return a + b; - } - add(1); - "#, - ); - assert!(error.contains("arity") || error.contains("arguments")); -} - -#[test] -fn test_empty_program() { - let result = run_ok(""); - assert!(result.is_null()); -} - -#[test] -fn test_expression_only() { - let result = run_ok("42;"); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_boolean_literals() { - let result = run_ok("true;"); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_ok("false;"); - assert_eq!(result.as_bool(), Some(false)); -} - -#[test] -fn test_null_literal() { - let result = run_ok("null;"); - assert!(result.is_null()); -} - -#[test] -fn test_negation() { - let result = run_ok("-42;"); - assert_eq!(result.as_int(), Some(-42)); - - let result = run_ok("-(10 - 5);"); - assert_eq!(result.as_int(), Some(-5)); -} - -#[test] -fn test_multiple_statements() { - let result = run_ok( - r#" - let a = 1; - let b = 2; - let c = 3; - a + b + c; - "#, - ); - assert_eq!(result.as_int(), Some(6)); -} - -#[test] -fn test_return_from_main() { - // The last expression/statement determines the program's return value - let result = run_ok("42;"); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_function_returns_early() { - let result = run_ok( - r#" - fn test(x) { - if x < 0 { - return 0; - } - return x * 2; - } - test(-5); - "#, - ); - assert_eq!(result.as_int(), Some(0)); - - let result = run_ok( - r#" - fn test(x) { - if x < 0 { - return 0; - } - return x * 2; - } - test(5); - "#, - ); - assert_eq!(result.as_int(), Some(10)); -} - -#[test] -fn test_nested_function_calls() { - let result = run_ok( - r#" - fn double(x) { - return x * 2; - } - fn quadruple(x) { - return double(double(x)); - } - quadruple(5); - "#, - ); - assert_eq!(result.as_int(), Some(20)); -} - -#[test] -fn test_function_with_no_params() { - let result = run_ok( - r#" - fn get_answer() { - return 42; - } - get_answer(); - "#, - ); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_function_no_explicit_return() { - let result = run_ok( - r#" - fn implicit_return() { - let x = 10; - } - implicit_return(); - "#, - ); - assert!(result.is_null()); -} diff --git a/aelys/tests/integration_tests.rs b/aelys/tests/integration_tests.rs deleted file mode 100644 index eeb30de..0000000 --- a/aelys/tests/integration_tests.rs +++ /dev/null @@ -1,742 +0,0 @@ -//! Integration tests for Aelys - converted from examples/test_*.aelys - -mod common; -use common::*; - -// ==Simple function tests ===== - -#[test] -fn test_simple_function_return() { - let result = run_aelys( - r#" -fn get_answer() { - return 42 -} -get_answer() -"#, - ); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_function_with_params() { - let result = run_aelys( - r#" -fn add(a, b) { - return a + b -} -add(10, 20) -"#, - ); - assert_eq!(result.as_int(), Some(30)); -} - -// ==If expression tests ===== - -#[test] -fn test_if_expression_simple() { - let result = run_aelys( - r#" -let x = if true { 1 } else { 2 } -x -"#, - ); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn test_if_expression_false() { - let result = run_aelys( - r#" -let x = if false { 1 } else { 2 } -x -"#, - ); - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_if_expression_with_return() { - let result = run_aelys( - r#" -fn test_return(n) { - return if n > 0 { n } else { 0 } -} -test_return(5) -"#, - ); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_if_expression_implicit_return() { - let result = run_aelys( - r#" -fn implicit_return(n) { - if n > 0 { n } else { 0 } -} -implicit_return(5) -"#, - ); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_nested_if_expression() { - let result = run_aelys( - r#" -fn nested(a, b) { - if a > 0 { - if b > 0 { a + b } else { a } - } else { - if b > 0 { b } else { 0 } - } -} -nested(1, 2) -"#, - ); - assert_eq!(result.as_int(), Some(3)); -} - -// ==Large integer tests ===== - -#[test] -fn test_large_integers() { - // Integers exceeding 48-bit range should produce an error - let err = run_aelys_err( - r#" -let a = 9007199254740000 -a -"#, - ); - assert!(err.contains("integer") && err.contains("exceeds") && err.contains("range")); -} - -#[test] -fn test_negative_large_integer() { - // Negative integers exceeding 48-bit range should produce an error - let err = run_aelys_err( - r#" -let c = -9007199254740991 -c -"#, - ); - assert!(err.contains("integer") && err.contains("exceeds") && err.contains("range")); -} - -#[test] -fn test_power_of_two_30() { - let result = run_aelys("1073741824"); - assert_eq!(result.as_int(), Some(1073741824)); -} - -#[test] -fn test_power_of_two_40() { - let result = run_aelys("1099511627776"); - assert_eq!(result.as_int(), Some(1099511627776)); -} - -// ==Recursion tests ===== - -#[test] -fn test_recursion_factorial() { - let result = run_aelys( - r#" -fn factorial(n) { - if n <= 1 { - return 1 - } - return n * factorial(n - 1) -} -factorial(10) -"#, - ); - assert_eq!(result.as_int(), Some(3628800)); -} - -#[test] -fn test_recursion_fibonacci() { - let result = run_aelys( - r#" -fn fib(n) { - if n <= 1 { - return n - } - return fib(n - 1) + fib(n - 2) -} -fib(10) -"#, - ); - assert_eq!(result.as_int(), Some(55)); -} - -#[test] -fn test_deep_recursion_500() { - let result = run_aelys( - r#" -fn deep(n) { - if n > 0 { - return deep(n - 1) - } - return n -} -deep(500) -"#, - ); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn test_deep_recursion_1020() { - let result = run_aelys( - r#" -fn deep(n) { - if n > 0 { - return deep(n - 1) - } - return n -} -deep(1020) -"#, - ); - assert_eq!(result.as_int(), Some(0)); -} - -// ==Mutual recursion ===== - -#[test] -fn test_mutual_recursion_is_even() { - let result = run_aelys( - r#" -fn is_even(n) { - if n == 0 { - return true - } - return is_odd(n - 1) -} - -fn is_odd(n) { - if n == 0 { - return false - } - return is_even(n - 1) -} - -is_even(10) -"#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_mutual_recursion_is_odd() { - let result = run_aelys( - r#" -fn is_even(n) { - if n == 0 { - return true - } - return is_odd(n - 1) -} - -fn is_odd(n) { - if n == 0 { - return false - } - return is_even(n - 1) -} - -is_odd(99) -"#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -// ==Division tests ===== - -#[test] -fn test_integer_division() { - let result = run_aelys("10 / 3"); - assert_eq!(result.as_int(), Some(3)); -} - -#[test] -fn test_exact_division() { - let result = run_aelys("10 / 2"); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_negative_dividend() { - let result = run_aelys("-10 / 3"); - assert_eq!(result.as_int(), Some(-3)); -} - -#[test] -fn test_negative_divisor() { - let result = run_aelys("10 / -3"); - assert_eq!(result.as_int(), Some(-3)); -} - -#[test] -fn test_both_negative() { - let result = run_aelys("-10 / -3"); - assert_eq!(result.as_int(), Some(3)); -} - -#[test] -fn test_float_division() { - let result = run_aelys("10.0 / 3.0"); - let f = result.as_float().unwrap(); - assert!((f - 3.3333333333333335).abs() < 0.0001); -} - -// ==Modulo tests ===== - -#[test] -fn test_modulo() { - let result = run_aelys("17 % 5"); - assert_eq!(result.as_int(), Some(2)); -} - -// ==Error tests ===== - -#[test] -fn test_division_by_zero() { - let result = aelys::run("10 / 0", ""); - assert!(result.is_err()); -} - -#[test] -fn test_modulo_by_zero() { - let result = aelys::run("10 % 0", ""); - assert!(result.is_err()); -} - -// ==Logic tests ===== - -#[test] -fn test_and_short_circuit() { - // false and X should return false without evaluating X - let result = run_aelys( - r#" -let r = false and true -r -"#, - ); - assert_eq!(result.as_bool(), Some(false)); -} - -#[test] -fn test_or_short_circuit() { - // true or X should return true without evaluating X - let result = run_aelys( - r#" -let r = true or false -r -"#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_complex_boolean() { - let result = run_aelys("true and false or true"); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_grouped_boolean() { - let result = run_aelys("(true or false) and (false or true)"); - assert_eq!(result.as_bool(), Some(true)); -} - -// ==Control flow tests ===== - -#[test] -fn test_while_loop() { - let result = run_aelys( - r#" -let mut i = 0 -while i < 10 { - i++ -} -i -"#, - ); - assert_eq!(result.as_int(), Some(10)); -} - -#[test] -fn test_while_with_break() { - let result = run_aelys( - r#" -let mut i = 0 -while true { - i++ - if i >= 5 { - break - } -} -i -"#, - ); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_while_with_continue() { - let result = run_aelys( - r#" -let mut i = 0 -let mut sum = 0 -while i < 10 { - i++ - if i % 2 == 0 { - continue - } - sum += i -} -sum -"#, - ); - // sum of odd numbers 1,3,5,7,9 = 25 - assert_eq!(result.as_int(), Some(25)); -} - -// ==Variable shadowing tests ===== - -#[test] -fn test_variable_shadowing_simple() { - // Blocks don't return implicit values in Aelys - // The result is null from the block statement - let result = run_aelys( - r#" -let x = 1 -{ - let x = 2 -} -x -"#, - ); - // After the block, x is back to outer scope value - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn test_variable_shadowing_outer_restored() { - let result = run_aelys( - r#" -let x = 1 -{ - let x = 2 -} -x -"#, - ); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn test_deep_shadowing() { - // Test that deeply nested shadowing works correctly - let result = run_aelys( - r#" -fn get_inner() { - let y = 0 - { - let y = 1 - { - let y = 2 - { - let y = 3 - { - let y = 4 - { - let y = 5 - return y - } - } - } - } - } -} -get_inner() -"#, - ); - assert_eq!(result.as_int(), Some(5)); -} - -// ==GC stress tests ===== - -#[test] -fn test_gc_stress_loop() { - // This should complete without running out of memory - let result = run_aelys( - r#" -fn gc_stress() { - let mut i = 0 - while i < 10000 { - let temp = "garbage string that should be collected" - i++ - } - i -} -gc_stress() -"#, - ); - assert_eq!(result.as_int(), Some(10000)); -} - -#[test] -fn test_gc_stress_concatenation() { - // String concatenation stress test - let result = run_aelys( - r#" -fn gc_stress2() { - let mut i = 0 - while i < 1000 { - let a = "string a" - let b = "string b" - let c = a + b - i++ - } - i -} -gc_stress2() -"#, - ); - assert_eq!(result.as_int(), Some(1000)); -} - -// ==Edge case tests ===== - -#[test] -fn test_negative_zero() { - let result = run_aelys("-0"); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn test_double_negation() { - let result = run_aelys("--42"); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_not_not() { - let result = run_aelys("not not true"); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_comparison_chain() { - // a < b should return bool, not chainable like Python - let result = run_aelys("1 < 2"); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_equality_types() { - // Different types should not be equal - let result = run_aelys("42 == true"); - assert_eq!(result.as_bool(), Some(false)); -} - -// ==@no_gc tests ===== - -#[test] -fn test_no_gc_decorator() { - let result = run_aelys( - r#" -@no_gc -fn critical_section(n) { - let mut sum = 0 - let mut i = 0 - while i < n { - sum += i - i++ - } - return sum -} - -critical_section(100) -"#, - ); - // sum of 0..99 = 99*100/2 = 4950 - assert_eq!(result.as_int(), Some(4950)); -} - -#[test] -fn test_string_concatenation() { - let result = run_aelys( - r#" -let a = "Hello" -let b = " World" -a + b -"#, - ); - // Since we can't easily check string content, just verify it runs without error - assert!(result.is_ptr()); -} - -#[test] -fn test_precedence_mul_add() { - let result = run_aelys("2 + 3 * 4"); - assert_eq!(result.as_int(), Some(14)); -} - -#[test] -fn test_precedence_grouped() { - let result = run_aelys("(2 + 3) * 4"); - assert_eq!(result.as_int(), Some(20)); -} - -#[test] -fn test_complex_expression() { - let result = run_aelys("1 + 2 * 3 - 4 / 2"); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_max_int_value() { - // Test actual 48-bit max value: (1 << 47) - 1 = 140737488355327 - let result = run_aelys("140737488355327"); - assert_eq!(result.as_int(), Some(140737488355327)); - - // Values exceeding 48-bit range should error - let err = run_aelys_err("140737488355328"); - assert!(err.contains("integer") && err.contains("exceeds") && err.contains("range")); -} - -#[test] -fn test_min_int_value() { - let result = run_aelys("-140737488355327"); - assert_eq!(result.as_int(), Some(-140737488355327)); - - let result = run_aelys("-140737488355328"); - assert_eq!(result.as_int(), Some(-140737488355328)); - - let err = run_aelys_err("-140737488355329"); - assert!(err.contains("integer") && err.contains("exceeds") && err.contains("range")); -} - -#[test] -fn test_block_as_statement() { - // Blocks are statements in Aelys, not expressions - // They don't return values - use functions for that - let result = run_aelys( - r#" -fn compute() { - let a = 10 - let b = 20 - a + b -} -compute() -"#, - ); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_many_variables() { - let result = run_aelys( - r#" -let a1 = 1 -let a2 = 2 -let a3 = 3 -let a4 = 4 -let a5 = 5 -let a6 = 6 -let a7 = 7 -let a8 = 8 -let a9 = 9 -let a10 = 10 -a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 -"#, - ); - assert_eq!(result.as_int(), Some(55)); -} - -#[test] -fn test_nested_function_calls() { - let result = run_aelys( - r#" -fn add(a, b) { a + b } -fn mul(a, b) { a * b } -fn combined(a, b, c) { - add(mul(a, b), c) -} -combined(2, 3, 4) -"#, - ); - assert_eq!(result.as_int(), Some(10)); -} - -#[test] -fn test_empty_function() { - let result = run_aelys( - r#" -fn empty() { } -empty() -"#, - ); - assert!(result.is_null()); -} - -#[test] -fn test_early_return() { - let result = run_aelys( - r#" -fn early(n) { - if n < 0 { - return -1 - } - if n == 0 { - return 0 - } - return 1 -} -early(-5) -"#, - ); - assert_eq!(result.as_int(), Some(-1)); -} - -#[test] -fn test_early_return_zero() { - let result = run_aelys( - r#" -fn early(n) { - if n < 0 { - return -1 - } - if n == 0 { - return 0 - } - return 1 -} -early(0) -"#, - ); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn test_early_return_positive() { - let result = run_aelys( - r#" -fn early(n) { - if n < 0 { - return -1 - } - if n == 0 { - return 0 - } - return 1 -} -early(5) -"#, - ); - assert_eq!(result.as_int(), Some(1)); -} diff --git a/aelys/tests/llvm_air_abi_tests.rs b/aelys/tests/llvm_air_abi_tests.rs new file mode 100644 index 0000000..c78d715 --- /dev/null +++ b/aelys/tests/llvm_air_abi_tests.rs @@ -0,0 +1,373 @@ +use aelys_air::layout::layout_of; +use aelys_air::{ + AirBlock, AirConst, AirFunction, AirIntSize, AirLocal, AirParam, AirProgram, AirStmt, + AirStmtKind, AirTerminator, AirType, BlockId, CallingConv, FunctionAttribs, FunctionId, GcMode, + InlineHint, LocalId, Operand, Place, Rvalue, +}; +use aelys_codegen::CodegenContext; +use aelys_codegen::types::alignment_of; +use inkwell::OptimizationLevel; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use inkwell::targets::{CodeModel, InitializationConfig, RelocMode, Target, TargetMachine}; +use inkwell::types::BasicTypeEnum; +use std::fs; +use tempfile::tempdir; + +fn compile_air_to_verified_ir(program: &AirProgram) -> String { + let dir = tempdir().expect("tempdir should be created"); + let ll_path = dir.path().join("module.ll"); + let ll_path_str = ll_path.to_string_lossy().to_string(); + + let mut codegen = CodegenContext::new("abi_hardening"); + codegen + .compile(program) + .expect("codegen compilation should succeed"); + codegen + .emit_ir(&ll_path_str) + .expect("llvm ir should be emitted"); + + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + + ir +} + +#[test] +fn llvm_panic_uses_ptr_len_signature() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "panic_probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Void, + locals: vec![], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Panic { + message: "X".to_string(), + span: None, + }, + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + assert!(ir.contains("declare void @__aelys_panic(ptr, i64)"), "{ir}"); + assert!(ir.contains("call void @__aelys_panic(ptr"), "{ir}"); + assert!(!ir.contains("declare void @__aelys_panic(ptr)"), "{ir}"); + assert!( + ir.contains("noreturn"), + "panic declaration should have noreturn attribute: {ir}" + ); +} + +#[test] +fn air_and_llvm_string_layout_match_x86_64_abi() { + let air_layout = layout_of(&AirType::Str); + assert_eq!(air_layout.size, 16); + assert_eq!(air_layout.align, 8); + + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "sink".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![aelys_air::AirParam { + id: aelys_air::LocalId(0), + ty: AirType::Str, + name: "s".to_string(), + span: None, + }], + ret_ty: AirType::I64, + locals: vec![], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Return(Some(Operand::Const(AirConst::Int( + 0, + AirIntSize::I64, + )))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + assert!(ir.contains("%__aelys_string = type { ptr, i64 }"), "{ir}"); + // Aelys-convention functions prepend an implicit env ptr; the string follows. + let sink_decl = ir + .lines() + .find(|l| l.contains("define fastcc i64 @sink")) + .expect("sink function must be defined"); + assert!( + sink_decl.contains("%__aelys_string"), + "string param must lower to %__aelys_string struct, not a bare ptr:\n{sink_decl}" + ); + assert!( + !sink_decl.contains("ptr, i64"), + "string must not be flattened to (ptr, i64) scalars:\n{sink_decl}" + ); + + let context = Context::create(); + let mut nul_terminated_ir = ir.into_bytes(); + nul_terminated_ir.push(0); + let buffer = MemoryBuffer::create_from_memory_range_copy(&nul_terminated_ir, "module.ll"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + + let str_ty = module + .get_struct_type("__aelys_string") + .expect("string struct should exist"); + assert!(!str_ty.is_packed(), "string struct must be non-packed"); + + let fields = str_ty.get_field_types(); + assert_eq!( + fields.len(), + 2, + "string struct must have exactly two fields" + ); + assert!( + matches!(fields[0], BasicTypeEnum::PointerType(_)), + "field #0 must be ptr" + ); + match fields[1] { + BasicTypeEnum::IntType(int_ty) => assert_eq!(int_ty.get_bit_width(), 64), + _ => panic!("field #1 must be i64"), + } + + Target::initialize_native(&InitializationConfig::default()) + .expect("native target initialization should succeed"); + let triple = TargetMachine::get_default_triple(); + let target = Target::from_triple(&triple).expect("target triple should be supported"); + let cpu = TargetMachine::get_host_cpu_name().to_string(); + let features = TargetMachine::get_host_cpu_features().to_string(); + let target_machine = target + .create_target_machine( + &triple, + &cpu, + &features, + OptimizationLevel::None, + RelocMode::Default, + CodeModel::Default, + ) + .expect("target machine should be created"); + let target_data = target_machine.get_target_data(); + + let llvm_align = alignment_of(str_ty.into()); + let llvm_align_abi = target_data.get_abi_alignment(&str_ty); + let llvm_size = target_data.get_abi_size(&str_ty); + + assert_eq!(llvm_align, 8); + assert_eq!(llvm_align_abi, 8); + assert_eq!(llvm_size, 16); + assert_eq!(llvm_align, air_layout.align); + assert_eq!( + u32::try_from(llvm_size).expect("size should fit u32"), + air_layout.size + ); +} + +#[test] +fn ssa_params_use_llvm_args_directly() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "echo".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![AirParam { + id: LocalId(0), + ty: AirType::Str, + name: "s".to_string(), + span: None, + }], + ret_ty: AirType::I64, + locals: vec![AirLocal { + id: LocalId(1), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::FieldAccess { + base: Operand::Copy(LocalId(0)), + field: "len".to_string(), + }, + }, + span: None, + }], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(1)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + assert!( + !ir.contains("alloca"), + "SSA params/locals must not generate alloca:\n{ir}" + ); + assert!( + !ir.contains("store"), + "SSA params/locals must not generate store:\n{ir}" + ); + assert!( + ir.contains("extractvalue"), + "field access on SSA str should use extractvalue:\n{ir}" + ); +} + +#[test] +fn mutable_locals_use_alloca_with_correct_alignment() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "mut_probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![ + AirParam { + id: LocalId(0), + ty: AirType::I64, + name: "x".to_string(), + span: None, + }, + AirParam { + id: LocalId(1), + ty: AirType::I64, + name: "y".to_string(), + span: None, + }, + ], + ret_ty: AirType::I64, + locals: vec![AirLocal { + id: LocalId(2), + ty: AirType::I64, + name: Some("a".to_string()), + is_mut: true, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::Use(Operand::Copy(LocalId(0))), + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::Use(Operand::Copy(LocalId(1))), + }, + span: None, + }, + ], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(2)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + assert!( + ir.contains("alloca i64"), + "mutable local must use alloca:\n{ir}" + ); + for line in ir.lines().filter(|l| l.contains("alloca i64")) { + assert!( + line.contains("align 8"), + "i64 alloca must have align 8: {line}" + ); + } + for line in ir.lines().filter(|l| l.contains("store i64 ")) { + assert!( + line.contains("align 8"), + "i64 store must have align 8: {line}" + ); + } + for line in ir.lines().filter(|l| l.contains("load i64,")) { + assert!( + line.contains("align 8"), + "i64 load must have align 8: {line}" + ); + } +} diff --git a/aelys/tests/llvm_air_index_tests.rs b/aelys/tests/llvm_air_index_tests.rs new file mode 100644 index 0000000..53ed6b2 --- /dev/null +++ b/aelys/tests/llvm_air_index_tests.rs @@ -0,0 +1,431 @@ +use aelys_air::{ + AirBlock, AirConst, AirFunction, AirLocal, AirParam, AirProgram, AirStmt, AirStmtKind, + AirTerminator, AirType, BlockId, CallingConv, FunctionAttribs, FunctionId, GcMode, InlineHint, + LocalId, Operand, Place, Rvalue, +}; +use aelys_codegen::CodegenContext; +use std::fs; +use tempfile::tempdir; + +fn compile_air_to_verified_ir(program: &AirProgram) -> String { + let dir = tempdir().expect("tempdir should be created"); + let ll_path = dir.path().join("module.ll"); + let ll_path_str = ll_path.to_string_lossy().to_string(); + + let mut codegen = CodegenContext::new("index_tests"); + codegen + .compile(program) + .expect("codegen compilation should succeed"); + codegen + .emit_ir(&ll_path_str) + .expect("llvm ir should be emitted"); + + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = inkwell::context::Context::create(); + let buffer = + inkwell::memory_buffer::MemoryBuffer::create_from_file(&ll_path).expect("ir readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + + ir +} + +fn default_attribs() -> FunctionAttribs { + FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + } +} + +/// Array read: Rvalue::Index on Array(I64, 4), verify GEP + bounds check + load +#[test] +fn array_index_read_generates_gep_and_bounds_check() { + // fn probe(idx: i64) -> i64 { + // let arr: [i64; 4] = zeroinit + // return arr[idx] + // } + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![AirParam { + id: LocalId(0), + ty: AirType::I64, + name: "idx".to_string(), + span: None, + }], + ret_ty: AirType::I64, + locals: vec![ + AirLocal { + id: LocalId(1), + ty: AirType::Array(Box::new(AirType::I64), 4), + name: Some("arr".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(2), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::Use(Operand::Const(AirConst::ZeroInit( + AirType::Array(Box::new(AirType::I64), 4), + ))), + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::Index { + base: Operand::Copy(LocalId(1)), + index: Operand::Copy(LocalId(0)), + }, + }, + span: None, + }, + ], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(2)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + // should have GEP into the array + assert!( + ir.contains("getelementptr inbounds"), + "array index should generate GEP:\n{ir}" + ); + // should have bounds check + assert!( + ir.contains("icmp uge"), + "array index should generate unsigned bounds check:\n{ir}" + ); + // should call __aelys_panic for OOB + assert!( + ir.contains("@__aelys_panic"), + "array index should call __aelys_panic on OOB:\n{ir}" + ); + // should have idx_oob and idx_ok labels + assert!(ir.contains("idx_oob:"), "should have idx_oob block:\n{ir}"); + assert!(ir.contains("idx_ok:"), "should have idx_ok block:\n{ir}"); + // should have unreachable after panic + assert!( + ir.contains("unreachable"), + "should have unreachable after panic:\n{ir}" + ); + // should load the element + assert!( + ir.contains("load i64"), + "array index should load element:\n{ir}" + ); +} + +/// String index: Rvalue::Index on Str delegates to __aelys_str_char_at (UTF-8 char indexing) +#[test] +fn string_index_read_calls_runtime_char_at() { + // fn probe(s: str, idx: i64) -> str { + // return s[idx] + // } + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![ + AirParam { + id: LocalId(0), + ty: AirType::Str, + name: "s".to_string(), + span: None, + }, + AirParam { + id: LocalId(1), + ty: AirType::I64, + name: "idx".to_string(), + span: None, + }, + ], + ret_ty: AirType::Str, + locals: vec![AirLocal { + id: LocalId(2), + ty: AirType::Str, + name: None, + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::Index { + base: Operand::Copy(LocalId(0)), + index: Operand::Copy(LocalId(1)), + }, + }, + span: None, + }], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(2)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + // should call the UTF-8 runtime function, not inline byte-level GEP + assert!( + ir.contains("@__aelys_str_char_at"), + "string index should call __aelys_str_char_at:\n{ir}" + ); + // The runtime function must be declared with the right argument types. + // Return convention varies by platform: by-value on Linux, sret on Windows. + let char_at_decl = ir + .lines() + .find(|l| l.contains("declare") && l.contains("@__aelys_str_char_at")) + .expect("__aelys_str_char_at must be declared"); + assert!( + char_at_decl.contains("__aelys_string"), + "char_at must involve %__aelys_string type:\n{char_at_decl}" + ); + assert!( + char_at_decl.contains("ptr") && char_at_decl.contains("i64"), + "char_at must accept (ptr, i64, i64) args:\n{char_at_decl}" + ); + // should not do byte-level GEP into string data + assert!( + !ir.contains("str_idx_ptr"), + "string index must not use byte-level GEP:\n{ir}" + ); +} + +/// Array write: Place::Index on Array(I64, 4), verify GEP + store + bounds check +#[test] +fn array_index_write_generates_gep_and_store() { + // fn probe(idx: i64, val: i64) -> void { + // let arr: [i64; 4] = zeroinit + // arr[idx] = val + // return void + // } + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![ + AirParam { + id: LocalId(0), + ty: AirType::I64, + name: "idx".to_string(), + span: None, + }, + AirParam { + id: LocalId(1), + ty: AirType::I64, + name: "val".to_string(), + span: None, + }, + ], + ret_ty: AirType::Void, + locals: vec![AirLocal { + id: LocalId(2), + ty: AirType::Array(Box::new(AirType::I64), 4), + name: Some("arr".to_string()), + is_mut: true, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::Use(Operand::Const(AirConst::ZeroInit( + AirType::Array(Box::new(AirType::I64), 4), + ))), + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Index(LocalId(2), Operand::Copy(LocalId(0))), + rvalue: Rvalue::Use(Operand::Copy(LocalId(1))), + }, + span: None, + }, + ], + terminator: AirTerminator::Return(None), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + // should have GEP + assert!( + ir.contains("getelementptr inbounds"), + "array write should generate GEP:\n{ir}" + ); + // should store the value + assert!( + ir.contains("store i64"), + "array write should generate store:\n{ir}" + ); + // should have bounds check + assert!( + ir.contains("icmp uge"), + "array write should have bounds check:\n{ir}" + ); + assert!( + ir.contains("@__aelys_panic"), + "array write should call panic on OOB:\n{ir}" + ); +} + +/// Bounds check structure: verify idx_oob has unreachable after __aelys_panic +#[test] +fn bounds_check_structure_has_unreachable_after_panic() { + // Reuse same simple program as array read test + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![AirParam { + id: LocalId(0), + ty: AirType::I64, + name: "idx".to_string(), + span: None, + }], + ret_ty: AirType::I64, + locals: vec![ + AirLocal { + id: LocalId(1), + ty: AirType::Array(Box::new(AirType::I64), 4), + name: Some("arr".to_string()), + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(2), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::Use(Operand::Const(AirConst::ZeroInit( + AirType::Array(Box::new(AirType::I64), 4), + ))), + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::Index { + base: Operand::Copy(LocalId(1)), + index: Operand::Copy(LocalId(0)), + }, + }, + span: None, + }, + ], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(2)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + // Find the idx_oob block and verify it ends with unreachable + let oob_block_start = ir + .find("idx_oob:") + .expect("should have idx_oob block in IR"); + let after_oob = &ir[oob_block_start..]; + // The block should contain the panic call and then unreachable + let next_label = after_oob.find("\n\n").or_else(|| { + // In some IR formats, blocks are separated by labels + after_oob[9..].find(':').map(|pos| pos + 9) + }); + let oob_block_text = match next_label { + Some(end) => &after_oob[..end], + None => after_oob, + }; + assert!( + oob_block_text.contains("@__aelys_panic"), + "idx_oob block should call __aelys_panic:\n{oob_block_text}" + ); + assert!( + oob_block_text.contains("unreachable"), + "idx_oob block should end with unreachable:\n{oob_block_text}" + ); +} diff --git a/aelys/tests/llvm_air_phi_alloca_tests.rs b/aelys/tests/llvm_air_phi_alloca_tests.rs new file mode 100644 index 0000000..950dfd6 --- /dev/null +++ b/aelys/tests/llvm_air_phi_alloca_tests.rs @@ -0,0 +1,223 @@ +use aelys_air::{ + AirBlock, AirConst, AirFunction, AirIntSize, AirLocal, AirParam, AirProgram, AirStmt, + AirStmtKind, AirTerminator, AirType, BinOp, BlockId, CallingConv, FunctionAttribs, FunctionId, + GcMode, InlineHint, LocalId, Operand, Place, Rvalue, +}; +use aelys_codegen::CodegenContext; +use std::fs; +use tempfile::tempdir; + +fn compile_air_to_verified_ir(program: &AirProgram) -> String { + let dir = tempdir().expect("tempdir should be created"); + let ll_path = dir.path().join("module.ll"); + let ll_path_str = ll_path.to_string_lossy().to_string(); + + let mut codegen = CodegenContext::new("phi_tests"); + codegen + .compile(program) + .expect("codegen compilation should succeed"); + codegen + .emit_ir(&ll_path_str) + .expect("llvm ir should be emitted"); + + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = inkwell::context::Context::create(); + let buffer = + inkwell::memory_buffer::MemoryBuffer::create_from_file(&ll_path).expect("ir readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + + ir +} + +fn default_attribs() -> FunctionAttribs { + FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + } +} + +#[test] +fn multi_block_assign_gets_alloca_and_verifies() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![AirParam { + id: LocalId(0), + ty: AirType::Bool, + name: "cond".to_string(), + span: None, + }], + ret_ty: AirType::I64, + locals: vec![AirLocal { + id: LocalId(1), + ty: AirType::I64, + // unnamed + immutable would normally skip alloca + name: None, + is_mut: false, + span: None, + }], + blocks: vec![ + AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Branch { + cond: Operand::Copy(LocalId(0)), + then_block: BlockId(1), + else_block: BlockId(2), + }, + }, + AirBlock { + id: BlockId(1), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::Use(Operand::Const(AirConst::Int(10, AirIntSize::I64))), + }, + span: None, + }], + terminator: AirTerminator::Goto(BlockId(3)), + }, + AirBlock { + id: BlockId(2), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::Use(Operand::Const(AirConst::Int(20, AirIntSize::I64))), + }, + span: None, + }], + terminator: AirTerminator::Goto(BlockId(3)), + }, + AirBlock { + id: BlockId(3), + stmts: vec![], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(1)))), + }, + ], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + // The local must use alloca since it's assigned in bb1 and bb2 + assert!( + ir.contains("alloca i64"), + "multi-block assigned local must use alloca:\n{ir}" + ); + // Must have store in both branches + let store_count = ir.matches("store i64").count(); + assert!( + store_count >= 2, + "expected at least 2 stores (one per branch), got {store_count}:\n{ir}" + ); + // Must load in the merge block + assert!( + ir.contains("load i64"), + "merge block must load the local:\n{ir}" + ); +} + +/// Same-block reassignment should not force alloca (value_map handles it fine) +#[test] +fn same_block_reassign_stays_ssa() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![AirParam { + id: LocalId(0), + ty: AirType::I64, + name: "x".to_string(), + span: None, + }], + ret_ty: AirType::I64, + locals: vec![ + AirLocal { + id: LocalId(1), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(2), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + // _1 = x + 1 + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::BinaryOp( + BinOp::Add, + Operand::Copy(LocalId(0)), + Operand::Const(AirConst::Int(1, AirIntSize::I64)), + ), + }, + span: None, + }, + // _2 = _1 + 1 (uses _1 from same block, no phi needed) + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::BinaryOp( + BinOp::Add, + Operand::Copy(LocalId(1)), + Operand::Const(AirConst::Int(1, AirIntSize::I64)), + ), + }, + span: None, + }, + ], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(2)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + // Single-block unnamed temps should not use alloca + assert!( + !ir.contains("alloca"), + "same-block SSA temps must not use alloca:\n{ir}" + ); +} diff --git a/aelys/tests/llvm_codegen_e2e_tests.rs b/aelys/tests/llvm_codegen_e2e_tests.rs new file mode 100644 index 0000000..b4b710e --- /dev/null +++ b/aelys/tests/llvm_codegen_e2e_tests.rs @@ -0,0 +1,1316 @@ +use aelys_air::layout::compute_layouts; +use aelys_air::lower::lower; +use aelys_air::mono::monomorphize; +use aelys_air::passes::copy_elim::eliminate_copies; +use aelys_air::passes::dead_locals::eliminate_dead_locals; +use aelys_air::passes::validate::validate_air; +use aelys_codegen::CodegenContext; +use aelys_driver::compile_file_with_llvm; +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_opt::OptimizationLevel; +use aelys_sema::TypeInference; +use aelys_syntax::Source; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use std::fs; +use std::process::Command; +use tempfile::tempdir; + +fn compile_to_verified_ir(source: &str) -> String { + compile_to_verified_ir_with_opt(source, OptimizationLevel::None) +} + +fn compile_to_verified_ir_with_opt(source: &str, opt: OptimizationLevel) -> String { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write(&source_path, source).expect("source should be written"); + compile_file_with_llvm(&source_path, opt, true) + .expect("llvm backend compilation should succeed"); + let ll_path = source_path.with_extension("ll"); + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + ir +} + +fn compile_source_to_verified_ir_without_link(source: &str) -> String { + let src = Source::new("", source); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let typed = TypeInference::infer_program(stmts, src).expect("sema failed"); + let mut air = lower(&typed); + air = monomorphize(air).unwrap(); + compute_layouts(&mut air); + eliminate_copies(&mut air); + eliminate_dead_locals(&mut air); + validate_air(&air).expect("AIR should validate"); + + let dir = tempdir().expect("tempdir should be created"); + let ll_path = dir.path().join("module.ll"); + let ll_path_str = ll_path.to_string_lossy().to_string(); + + let mut codegen = CodegenContext::new("e2e_no_link"); + codegen + .compile(&air) + .expect("codegen compilation should succeed"); + codegen + .emit_ir(&ll_path_str) + .expect("llvm ir should be emitted"); + + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + ir +} + +fn all_i64_stores_align8(ir: &str) -> bool { + ir.lines() + .filter(|line| line.contains("store i64")) + .all(|line| line.contains("align 8")) +} + +fn has_back_edge(ir: &str) -> bool { + let mut current_block = None; + for line in ir.lines() { + let trimmed = line.trim_start(); + if let Some(id) = parse_block_label(trimmed) { + current_block = Some(id); + continue; + } + let Some(from) = current_block else { + continue; + }; + for target in parse_branch_targets(trimmed) { + if target <= from { + return true; + } + } + } + false +} + +fn parse_block_label(line: &str) -> Option { + let rest = line.strip_prefix("bb")?; + let (digits, _) = rest.split_once(':')?; + if digits.is_empty() || !digits.chars().all(|c| c.is_ascii_digit()) { + return None; + } + digits.parse().ok() +} + +fn parse_branch_targets(line: &str) -> Vec { + let mut targets = Vec::new(); + let mut remaining = line; + while let Some(pos) = remaining.find("%bb") { + let after = &remaining[(pos + 3)..]; + let digits: String = after.chars().take_while(|c| c.is_ascii_digit()).collect(); + if digits.is_empty() { + if after.is_empty() { + break; + } + remaining = &after[1..]; + continue; + } + if let Ok(id) = digits.parse() { + targets.push(id); + } + remaining = &after[digits.len()..]; + } + targets +} + +fn executable_path_for(source_path: &std::path::Path) -> std::path::PathBuf { + let mut output = source_path.with_extension(""); + if cfg!(windows) { + output.set_extension("exe"); + } + output +} + +fn linker_unavailable(error: &str) -> bool { + error.contains("failed to run `lld-link`: program not found") + || error.contains("failed to run `link`: program not found") + || error.contains("failed with status Some(-1073741819)") +} + +#[test] +fn llvm_returns_integer_constant() { + let ir = compile_to_verified_ir("fn constant() -> i64 { return 42 }"); + assert!(ir.contains("ret i64")); +} + +#[test] +fn llvm_adds_two_parameters() { + let ir = compile_to_verified_ir("fn add(a: i64, b: i64) -> i64 { return a + b }"); + assert!(ir.contains("add i64")); + assert!(all_i64_stores_align8(&ir)); +} + +#[test] +fn llvm_generates_conditional_branch() { + let ir = compile_to_verified_ir("fn choose(x: i64) -> i64 { if x > 0 { return 1 } return 2 }"); + assert!(ir.contains("br i1")); +} + +#[test] +fn llvm_generates_while_back_edge() { + let ir = compile_to_verified_ir( + r#" +fn count(n: i64) -> i64 { + let mut i: i64 = 0 + while i < n { + i = i + 1 + } + return i +} +"#, + ); + assert!(has_back_edge(&ir)); +} + +#[test] +fn llvm_generates_sitofp_for_i32_to_f64() { + let ir = compile_to_verified_ir( + r#" +fn cast_it(x: i32) -> f64 { + return x as f64 +} +"#, + ); + assert!(ir.contains("sitofp"), "{ir}"); +} + +#[test] +fn llvm_generates_gep_for_struct_init_and_access() { + let ir = compile_to_verified_ir( + r#" +struct Point { x: i64, y: i64 } +fn read_x() -> i64 { + let p = Point { x: 1, y: 2 } + return p.x +} +"#, + ); + assert!(ir.contains("getelementptr")); +} + +#[test] +fn llvm_generates_function_call() { + let ir = compile_to_verified_ir( + "fn callee(x: i64) -> i64 { return x } fn caller() -> i64 { return callee(7) }", + ); + assert!(ir.contains("call")); + assert!(ir.contains("@callee")); +} + +#[test] +fn llvm_emits_global_string_constant() { + let ir = compile_to_verified_ir("fn hello() -> string { return \"hello\" }"); + assert!(ir.contains("@str_")); + assert!(ir.contains("hello")); +} + +#[test] +fn llvm_lowers_println_to_aelys_write_with_slice_abi() { + let ir = compile_to_verified_ir("fn greet() { println(\"Hello\") }"); + assert!(ir.contains("declare void @__aelys_write(ptr, i64)"), "{ir}"); + assert!(!ir.contains("declare i64 @println"), "{ir}"); + assert!(!ir.contains("declare void @println"), "{ir}"); + assert!(!ir.contains("declare i64 @print"), "{ir}"); + assert!(!ir.contains("declare void @print"), "{ir}"); + assert!(ir.contains("c\"Hello\\00\""), "{ir}"); + assert!(!ir.contains("c\"\\00Hello"), "{ir}"); + assert!(!ir.contains(", i64 0, i64 1"), "{ir}"); + assert!(ir.contains("call void @__aelys_write"), "{ir}"); + assert!(ir.contains("i64 5"), "{ir}"); +} + +#[test] +fn echo_example() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + let s = "Hello" + println(s) + return s.len +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert!(output.stdout == b"Hello\n" || output.stdout == b"Hello\r\n"); + assert_eq!(output.status.code().unwrap_or(-1), 5); +} + +#[test] +fn internal_nul_preserved() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + println("A\0B") + return 0 +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert!(output.stdout == b"A\0B\n" || output.stdout == b"A\0B\r\n"); +} + +#[test] +fn len_on_temporary_literal_expression() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + return "hello".len +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert_eq!(output.status.code().unwrap_or(-1), 5); +} + +#[test] +fn len_in_callee_on_string_parameter() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn sink(s: string) -> i64 { + return s.len +} + +fn main() -> i64 { + return sink("Hello") +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert_eq!(output.status.code().unwrap_or(-1), 5); +} + +#[test] +fn llvm_main_fn_value_uses_backend_symbol_mapping() { + let ir = compile_source_to_verified_ir_without_link( + r#" +enum Holder { + Value(T), + Empty, +} + +fn bounce(h: Holder i64>) -> Holder i64> { + return h +} + +fn main() -> i64 { + let h: Holder i64> = Holder::Value(main) + let out = bounce(h) + return match out { + Holder::Value(_) => 42 + Holder::Empty => 0 + } +} +"#, + ); + assert!(ir.contains("@__aelys_main"), "{ir}"); + assert!(ir.contains("ptr @__aelys_main") || ir.contains("@__aelys_main"), "{ir}"); +} + +#[test] +fn llvm_rejects_main_with_parameters_for_native_entry() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main(x: i64) -> i64 { + return x +} +"#, + ) + .expect("source should be written"); + let err = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) + .expect_err("llvm backend compilation should fail"); + let rendered = err.to_string(); + assert!( + rendered.contains("invalid native entry: main must have no parameters (found 1)"), + "{rendered}" + ); +} + +#[test] +fn llvm_rejects_main_returning_i32_for_native_entry() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i32 { + return 1 +} +"#, + ) + .expect("source should be written"); + let err = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) + .expect_err("llvm backend compilation should fail"); + let rendered = err.to_string(); + // sema now catches the return type mismatch (i64 literal vs i32 annotation) before codegen can check the native entry constraint <3 + assert!( + rendered.contains("type mismatch") || rendered.contains("invalid native entry"), + "expected type mismatch or native entry error, got: {rendered}" + ); +} + +#[test] +fn llvm_preserves_sema_diagnostic_for_return_null_in_i64_function() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn foo() -> i64 { + for i in 0..10 { + return null + } +} +"#, + ) + .expect("source should be written"); + let err = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) + .expect_err("compilation should fail at sema stage"); + let rendered = err.to_string(); + assert!( + rendered.contains("expected `i64`, found `null`"), + "{rendered}" + ); + assert!( + !rendered.contains("[llvm-backend]"), + "sema error must not be re-labeled as llvm backend: {rendered}" + ); + assert!( + !rendered.contains(":1:1"), + "sema error should keep its original source span: {rendered}" + ); +} + +#[test] +fn llvm_sema_diagnostic_reports_multiple_errors_separately() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn first() -> i64 { + return null +} + +fn second() -> i64 { + return null +} +"#, + ) + .expect("source should be written"); + let err = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) + .expect_err("compilation should fail at sema stage"); + let rendered = err.to_string(); + // each error should be a separate diagnostic with its own error code + let error_count = rendered.matches("error[E0301]").count(); + assert!( + error_count >= 2, + "should have at least 2 separate error diagnostics, got {}: {rendered}", + error_count + ); + assert!( + rendered.contains("first"), + "should mention function 'first': {rendered}" + ); + assert!( + rendered.contains("second"), + "should mention function 'second': {rendered}" + ); +} + +#[test] +fn llvm_native_entry_maps_negative_i64_main_exit_code_to_u8() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + return -1 +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert_eq!(output.status.code().unwrap_or(-1), 255); +} + +#[test] +fn llvm_native_entry_returns_zero_for_void_main() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> void { + let x: i64 = 1 +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert_eq!(output.status.code().unwrap_or(-1), 0); +} + +#[test] +fn llvm_nested_generic_enum_type_arg_compiles() { + let ir = compile_to_verified_ir( + r#" +enum Pair { + Both(A, B), + Neither, +} + +enum Boxed { + Value(T), + Empty, +} + +fn first_from_box(b: Boxed>) -> i64 { + return match b { + Boxed::Value(p) => match p { + Pair::Both(x, _) => x, + Pair::Neither => -1, + }, + Boxed::Empty => -2, + } +} +"#, + ); + assert!( + ir.contains("__mono_Boxed_enum___mono_Pair_i64$str"), + "nested generic enum monomorphization should survive into IR:\n{ir}" + ); +} + +#[test] +fn llvm_nested_generic_enum_unit_variant_only_compiles() { + let ir = compile_to_verified_ir( + r#" +enum Pair { + Both(A, B), + Neither, +} + +enum Boxed { + Value(T), + Empty, +} + +fn get_empty() -> Boxed> { + return Boxed::Empty +} +"#, + ); + assert!( + ir.contains("__mono_Boxed_enum___mono_Pair_i64$str"), + "unit-only nested generic enum should still monomorphize parent enum:\n{ir}" + ); + assert!( + ir.contains("__mono_Pair_i64$str"), + "unit-only nested generic enum should also synthesize nested enum def:\n{ir}" + ); +} + +#[test] +fn llvm_generic_enum_unit_variant_with_fnptr_type_arg_compiles() { + let ir = compile_to_verified_ir( + r#" +enum Holder { + Value(T), + Empty, +} + +fn apply_default() -> Holder i64> { + return Holder::Empty +} +"#, + ); + assert!( + ir.contains("__mono_Holder_fnptr$i64$Ri64"), + "fnptr-instantiated generic enum should survive into IR:\n{ir}" + ); +} + +#[test] +fn llvm_generic_enum_named_fn_payload_uses_fnptr_mono() { + let ir = compile_source_to_verified_ir_without_link( + r#" +enum Holder { + Value(T), + Empty, +} + +fn inc(x: i64) -> i64 { + return x + 1 +} + +fn call_holder(h: Holder i64>) -> i64 { + return match h { + Holder::Value(f) => f(41) + Holder::Empty => 0 + } +} + +fn main() -> i64 { + let h: Holder i64> = Holder::Value(inc) + return call_holder(h) +} +"#, + ); + assert!( + ir.contains("__mono_Holder_fnptr$i64$Ri64"), + "named function payload enum should use fnptr mono in IR:\n{ir}" + ); + assert!( + !ir.contains("__mono_Holder_ptr_void"), + "named function payload enum must not use ptr_void mono in IR:\n{ir}" + ); +} + +#[test] +#[ignore = "multi-module compilation not yet supported by LLVM backend"] +fn llvm_multi_module_strings_compile_and_run() { + let dir = tempdir().expect("tempdir should be created"); + let module_path = dir.path().join("strings.aelys"); + let source_path = dir.path().join("main.aelys"); + + fs::write( + &module_path, + r#" +pub fn alpha() -> string { + return "A" +} + +pub fn beta() -> string { + return "B" +} +"#, + ) + .expect("module source should be written"); + + fs::write( + &source_path, + r#" +needs strings + +fn main() -> i64 { + strings.alpha() + strings.beta() + return 0 +} +"#, + ) + .expect("main source should be written"); + + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert!( + output.stdout.is_empty(), + "unexpected stdout: {:?}", + output.stdout + ); + assert_eq!(output.status.code().unwrap_or(-1), 0); +} + +#[test] +fn llvm_void_function_without_explicit_return_uses_ret_void() { + let ir = compile_to_verified_ir( + r#" +fn unit_like() -> void { + let x: i64 = 1 +} +"#, + ); + assert!(ir.contains("ret void"), "{ir}"); + assert!(!ir.contains("ret i64 0")); + assert!(all_i64_stores_align8(&ir)); +} + +/// String indexing compiles through full pipeline and delegates to runtime. +#[test] +fn llvm_string_index_compiles_and_calls_runtime() { + let ir = compile_to_verified_ir( + r#" +fn char_at(s: string, i: i64) -> string { + return s[i] +} +"#, + ); + assert!( + ir.contains("@__aelys_str_char_at"), + "string index should call __aelys_str_char_at:\n{ir}" + ); + // Return convention varies by platform: by-value on Linux, sret on Windows. + let char_at_decl = ir + .lines() + .find(|l| l.contains("declare") && l.contains("@__aelys_str_char_at")) + .expect("__aelys_str_char_at must be declared"); + assert!( + char_at_decl.contains("__aelys_string"), + "char_at must involve %__aelys_string type:\n{char_at_decl}" + ); + assert!( + char_at_decl.contains("ptr") && char_at_decl.contains("i64"), + "char_at must accept (ptr, i64, i64) args:\n{char_at_decl}" + ); +} + +/// Array literal creation + index read compiles through full pipeline. +#[test] +fn llvm_array_literal_index_read_compiles() { + let ir = compile_to_verified_ir( + r#" +fn second() -> i64 { + let arr = [10, 20, 30] + return arr[1] +} +"#, + ); + // stack-allocated array: alloca + stores + GEP + assert!( + ir.contains("alloca [3 x i64]"), + "array literal should use alloca [3 x i64]:\n{ir}" + ); + assert!( + ir.contains("store i64 10"), + "array literal should store first element:\n{ir}" + ); + assert!( + ir.contains("store i64 20"), + "array literal should store second element:\n{ir}" + ); + assert!( + ir.contains("store i64 30"), + "array literal should store third element:\n{ir}" + ); + assert!( + ir.contains("getelementptr"), + "array index should generate GEP:\n{ir}" + ); + assert!( + ir.contains("@__aelys_panic"), + "array index should panic on OOB:\n{ir}" + ); +} + +/// Array index write compiles through full pipeline. +#[test] +fn llvm_array_index_write_compiles() { + let ir = compile_to_verified_ir( + r#" +fn mutate() -> i64 { + let mut arr = [10, 20, 30] + arr[0] = 99 + return arr[0] +} +"#, + ); + assert!( + ir.contains("alloca [3 x i64]"), + "array should use stack allocation:\n{ir}" + ); + assert!( + ir.contains("store i64 99"), + "array index write should generate store for new value:\n{ir}" + ); + assert!( + ir.contains("getelementptr"), + "array index write should generate GEP:\n{ir}" + ); +} + +/// Index as function argument compiles (indexing in expression position). +#[test] +fn llvm_index_in_function_argument_compiles() { + let ir = compile_to_verified_ir( + r#" +fn identity(x: i64) -> i64 { return x } +fn use_index() -> i64 { + let arr = [10, 20, 30] + return identity(arr[1]) +} +"#, + ); + assert!( + ir.contains("@identity"), + "should call identity function:\n{ir}" + ); + assert!( + ir.contains("getelementptr"), + "index in arg position should generate GEP:\n{ir}" + ); +} + +/// Array index with variable (not constant) generates bounds check. +#[test] +fn llvm_array_index_with_variable_has_bounds_check() { + let ir = compile_to_verified_ir( + r#" +fn at(arr: [i64; 3], i: i64) -> i64 { + return arr[i] +} +"#, + ); + assert!( + ir.contains("icmp uge"), + "variable index should have bounds check:\n{ir}" + ); + assert!( + ir.contains("@__aelys_panic"), + "variable index should panic on OOB:\n{ir}" + ); + assert!(ir.contains("idx_oob:"), "should have idx_oob block:\n{ir}"); + assert!(ir.contains("idx_ok:"), "should have idx_ok block:\n{ir}"); +} + +/// Index read + write in a void function (swap pattern) +/// Check if it does produces `ret void` instead of `ret ptr null` +#[test] +fn llvm_array_swap_pattern_compiles() { + let ir = compile_to_verified_ir( + r#" +fn swap(mut arr: [i64; 3], i: i64, j: i64) -> void { + let tmp = arr[i] + arr[i] = arr[j] + arr[j] = tmp +} +"#, + ); + // Multiple GEPs and stores for the swap + let gep_count = ir.matches("getelementptr").count(); + assert!( + gep_count >= 3, + "swap should generate at least 3 GEPs (read i, read j, write i, write j), got {gep_count}:\n{ir}" + ); + let store_count = ir.matches("store i64").count(); + assert!( + store_count >= 2, + "swap should generate at least 2 stores, got {store_count}:\n{ir}" + ); + // Void function must emit ret void, not ret ptr null + assert!( + ir.contains("ret void"), + "void function with index assignment should emit ret void:\n{ir}" + ); +} + +/// String indexing on a literal compiles. +#[test] +fn llvm_string_literal_index_compiles() { + let ir = compile_to_verified_ir( + r#" +fn first_char() -> string { + let s = "hello" + return s[0] +} +"#, + ); + assert!( + ir.contains("@__aelys_str_char_at"), + "string literal index should call __aelys_str_char_at:\n{ir}" + ); +} + +#[test] +fn llvm_loop_with_array_index_compiles() { + let ir = compile_to_verified_ir( + r#" +fn sum_array(arr: [i64; 10], n: i64) -> i64 { + let mut total: i64 = 0 + let mut i: i64 = 0 + while i < n { + total = total + arr[i] + i = i + 1 + } + return total +} +"#, + ); + assert!(has_back_edge(&ir), "loop should have a back edge:\n{ir}"); + assert!( + ir.contains("getelementptr"), + "loop body index should generate GEP:\n{ir}" + ); + assert!( + ir.contains("icmp uge"), + "loop body index should have bounds check:\n{ir}" + ); +} + +#[test] +fn llvm_void_function_with_assignment_produces_ret_void() { + let ir = compile_to_verified_ir( + r#" +fn set_it(x: i64) -> void { + let mut y: i64 = 0 + y = x +} +"#, + ); + assert!( + ir.contains("ret void"), + "void function with assignment should emit ret void:\n{ir}" + ); + assert!( + !ir.contains("ret ptr null"), + "void function must not emit ret ptr null:\n{ir}" + ); +} + +#[test] +fn llvm_void_function_with_index_assign_produces_ret_void() { + let ir = compile_to_verified_ir( + r#" +fn fill(mut arr: [i64; 3], i: i64, val: i64) -> void { + arr[i] = val +} +"#, + ); + assert!( + ir.contains("ret void"), + "void function with index assign should emit ret void:\n{ir}" + ); + assert!( + !ir.contains("ret ptr null"), + "void function must not emit ret ptr null:\n{ir}" + ); +} + +/// Integer division emits a div-zero check in the IR. +#[test] +fn llvm_int_div_emits_div_zero_check() { + let ir = compile_to_verified_ir( + r#" +fn divide(a: i64, b: i64) -> i64 { + return a / b +} +"#, + ); + assert!( + ir.contains("icmp eq"), + "integer div should compare divisor to zero:\n{ir}" + ); + assert!( + ir.contains("div_zero:"), + "should have div_zero trap block:\n{ir}" + ); + assert!( + ir.contains("div_ok:"), + "should have div_ok continuation block:\n{ir}" + ); + assert!( + ir.contains("@__aelys_panic"), + "div-by-zero should call __aelys_panic:\n{ir}" + ); +} + +/// Integer modulo also emits a div-zero check. +#[test] +fn llvm_int_rem_emits_div_zero_check() { + let ir = compile_to_verified_ir( + r#" +fn modulo(a: i64, b: i64) -> i64 { + return a % b +} +"#, + ); + assert!( + ir.contains("div_zero:"), + "integer rem should have div_zero trap block:\n{ir}" + ); + assert!( + ir.contains("@__aelys_panic"), + "integer rem should call __aelys_panic:\n{ir}" + ); +} + +/// Unsigned division also gets the check. +#[test] +fn llvm_unsigned_div_emits_div_zero_check() { + let ir = compile_to_verified_ir( + r#" +fn udivide(a: u64, b: u64) -> u64 { + return a / b +} +"#, + ); + assert!( + ir.contains("div_zero:"), + "unsigned div should have div_zero trap block:\n{ir}" + ); + assert!( + ir.contains("udiv"), + "unsigned div should emit udiv instruction:\n{ir}" + ); +} + +/// Float division does NOT emit a div-zero check (IEEE 754 well-defined). +#[test] +fn llvm_float_div_has_no_div_zero_check() { + let ir = compile_to_verified_ir( + r#" +fn fdivide(a: f64, b: f64) -> f64 { + return a / b +} +"#, + ); + assert!( + !ir.contains("div_zero:"), + "float div should NOT have div_zero check:\n{ir}" + ); + assert!( + ir.contains("fdiv"), + "float div should emit fdiv instruction:\n{ir}" + ); +} + +/// Div-zero check survives -O2 — the panic call must not be optimized away. +#[test] +fn llvm_div_zero_check_survives_o2() { + let ir = compile_to_verified_ir_with_opt( + r#" +fn divide(a: i64, b: i64) -> i64 { + return a / b +} +"#, + OptimizationLevel::Standard, + ); + assert!( + ir.contains("@__aelys_panic"), + "div-zero check must survive -O2 — panic must not be eliminated:\n{ir}" + ); +} + +/// Runtime: division by zero actually terminates the process (non-zero exit). +#[test] +fn llvm_div_by_zero_runtime_panics() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + let a: i64 = 42 + let b: i64 = 0 + return a / b +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::None, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert!( + !output.status.success(), + "division by zero should cause non-zero exit" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("division by zero"), + "panic message should contain 'division by zero', got: {stderr}" + ); +} + +/// Runtime: division by zero also panics at -O2. +#[test] +fn llvm_div_by_zero_runtime_panics_at_o2() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + let a: i64 = 42 + let b: i64 = 0 + return a / b +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert!( + !output.status.success(), + "division by zero at -O2 should cause non-zero exit" + ); +} + +/// Runtime: modulo by zero also panics. +#[test] +fn llvm_rem_by_zero_runtime_panics() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + let a: i64 = 42 + let b: i64 = 0 + return a % b +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::None, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert!( + !output.status.success(), + "modulo by zero should cause non-zero exit" + ); +} + +/// Runtime: normal division still works correctly (no regression). +#[test] +fn llvm_normal_div_still_works() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + let a: i64 = 100 + let b: i64 = 4 + return a / b +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::None, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + assert_eq!( + output.status.code().unwrap_or(-1), + 25, + "100 / 4 should return 25" + ); +} + +/// Runtime: division by runtime-computed zero in a loop panics. +#[test] +fn llvm_div_by_zero_in_loop_panics() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +fn main() -> i64 { + let mut sum: i64 = 0 + for i in 0..3 { + let divisor: i64 = 2 - i + sum = sum + 10 / divisor + } + return sum +} +"#, + ) + .expect("source should be written"); + if let Err(err) = compile_file_with_llvm(&source_path, OptimizationLevel::None, true) { + if linker_unavailable(&err.to_string()) { + return; + } + panic!("llvm backend compilation should succeed: {err}"); + } + + if !executable_path_for(&source_path).is_file() { + return; + } + + let exe_path = executable_path_for(&source_path); + let output = Command::new(&exe_path) + .output() + .expect("compiled executable should run"); + // i=0: 10/2=5, i=1: 10/1=10, i=2: 10/0 → panic + assert!( + !output.status.success(), + "division by zero in loop iteration should panic" + ); +} diff --git a/aelys/tests/llvm_codegen_quality_tests.rs b/aelys/tests/llvm_codegen_quality_tests.rs new file mode 100644 index 0000000..ee96997 --- /dev/null +++ b/aelys/tests/llvm_codegen_quality_tests.rs @@ -0,0 +1,236 @@ +use aelys_driver::compile_file_with_llvm; +use aelys_opt::OptimizationLevel; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use std::fs; +use tempfile::tempdir; + +fn compile_to_verified_ir(source: &str) -> String { + compile_to_verified_ir_with_opt(source, OptimizationLevel::None) +} + +fn compile_to_verified_ir_with_opt(source: &str, opt: OptimizationLevel) -> String { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write(&source_path, source).expect("source should be written"); + + compile_file_with_llvm(&source_path, opt, true) + .expect("llvm backend compilation should succeed"); + + let ll_path = source_path.with_extension("ll"); + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + + ir +} + +fn assert_all_aligned(ir: &str, needle: &str, expected_align: u32) { + let matching: Vec<_> = ir.lines().filter(|line| line.contains(needle)).collect(); + if matching.is_empty() { + return; + } + + let required = format!("align {expected_align}"); + for line in matching { + assert!( + line.contains(&required), + "bad alignment for `{needle}`: {line}" + ); + } +} + +#[test] +fn llvm_uses_expected_alignment_for_mutable_locals() { + let ir = compile_to_verified_ir_with_opt( + r#" +fn int_align(n: i64) -> i64 { + let mut a64: i64 = 0 + let mut a32: i32 = 0 + let mut a16: i16 = 0 + let mut a8: i8 = 0 + let mut ab: bool = false + while a64 < n { + a8 = a8 + (1 as i8) + a16 = a16 + (1 as i16) + a32 = a32 + 1 + a64 = a64 + 1 + if ab { ab = false } else { ab = true } + } + return a64 + (a32 as i64) + (a16 as i64) + (a8 as i64) + (ab as i64) +} + +fn float_align(n: i64) -> f64 { + let mut af32: f32 = 0.0 as f32 + let mut af64: f64 = 0.0 + let mut i: i64 = 0 + while i < n { + af32 = af32 + (1.0 as f32) + af64 = af64 + 1.0 + i = i + 1 + } + return af64 + (af32 as f64) +} + +fn string_align(s: string, n: i64) -> string { + let mut sp: string = s + let mut i: i64 = 0 + while i < n { + sp = s + i = i + 1 + } + return sp +} +"#, + OptimizationLevel::None, + ); + + assert_all_aligned(&ir, "alloca i8,", 1); + assert_all_aligned(&ir, "alloca i16,", 2); + assert_all_aligned(&ir, "alloca i32,", 4); + assert_all_aligned(&ir, "alloca i64,", 8); + assert_all_aligned(&ir, "alloca i1,", 1); + assert_all_aligned(&ir, "alloca float,", 4); + assert_all_aligned(&ir, "alloca double,", 8); + assert_all_aligned(&ir, "alloca %__aelys_string,", 8); + + assert_all_aligned(&ir, "store i8 ", 1); + assert_all_aligned(&ir, "store i16 ", 2); + assert_all_aligned(&ir, "store i32 ", 4); + assert_all_aligned(&ir, "store i64 ", 8); + assert_all_aligned(&ir, "store i1 ", 1); + assert_all_aligned(&ir, "store float ", 4); + assert_all_aligned(&ir, "store double ", 8); + assert_all_aligned(&ir, "store %__aelys_string ", 8); + + assert_all_aligned(&ir, "load i8,", 1); + assert_all_aligned(&ir, "load i16,", 2); + assert_all_aligned(&ir, "load i32,", 4); + assert_all_aligned(&ir, "load i64,", 8); + assert_all_aligned(&ir, "load i1,", 1); + assert_all_aligned(&ir, "load float,", 4); + assert_all_aligned(&ir, "load double,", 8); + assert_all_aligned(&ir, "load %__aelys_string,", 8); + + let any_alloca = ir.lines().any(|l| l.contains("alloca")); + assert!( + any_alloca, + "mutable locals in loops must produce alloca instructions:\n{ir}" + ); +} + +#[test] +fn llvm_ssa_params_skip_alloca() { + let ir = compile_to_verified_ir( + r#" +fn add(a: i64, b: i64) -> i64 { + return a + b +} + +fn identity(x: i64) -> i64 { + return x +} +"#, + ); + + assert!( + !ir.contains("alloca"), + "pure SSA functions must not generate alloca:\n{ir}" + ); +} + +#[test] +fn llvm_verify_passes_for_fibonacci_sum_and_vec2_length() { + let programs = [ + r#" +fn fibonacci(n: i32) -> i64 { + if n <= 1 { + return n as i64 + } + return fibonacci(n - 1) + fibonacci(n - 2) +} +"#, + r#" +fn sum(n: i32) -> i64 { + let mut acc: i64 = 0 + let mut i: i32 = 0 + while i < n { + acc = acc + i as i64 + i = i + 1 + } + return acc +} +"#, + r#" +struct Vec2 { x: f64, y: f64 } + +fn vec2_length(v: Vec2) -> f64 { + return v.x * v.x + v.y * v.y +} +"#, + ]; + + for program in programs { + let ir = compile_to_verified_ir(program); + assert!(ir.contains("define")); + } +} + +#[test] +fn llvm_calls_match_fastcc_declarations() { + let ir = compile_to_verified_ir_with_opt( + r#" +fn callee(x: i64) -> i64 { + return x + 1 +} + +fn caller(v: i64) -> i64 { + return callee(v) +} +"#, + OptimizationLevel::None, + ); + + assert!(ir.contains("define fastcc i64 @callee")); + assert!(ir.contains("call fastcc i64 @callee")); +} + +#[test] +fn llvm_o2_eliminates_allocas_via_mem2reg() { + let source = r#" +fn sum(n: i64) -> i64 { + let mut acc: i64 = 0 + let mut i: i64 = 0 + while i < n { + acc = acc + i + i = i + 1 + } + return acc +} +"#; + let ir_o0 = compile_to_verified_ir_with_opt(source, OptimizationLevel::None); + let ir_o2 = compile_to_verified_ir_with_opt(source, OptimizationLevel::Standard); + + // O0 must keep allocas (no mem2reg) + assert!( + ir_o0.contains("alloca"), + "O0 should preserve alloca instructions:\n{ir_o0}" + ); + + // O2 should run mem2reg and eliminate allocas in favor of phi nodes + assert!( + !ir_o2.contains("alloca"), + "O2 should eliminate allocas via mem2reg:\n{ir_o2}" + ); + assert!( + ir_o2.contains("phi"), + "O2 should introduce phi nodes after mem2reg:\n{ir_o2}" + ); +} diff --git a/aelys/tests/llvm_global_tests.rs b/aelys/tests/llvm_global_tests.rs new file mode 100644 index 0000000..1247af9 --- /dev/null +++ b/aelys/tests/llvm_global_tests.rs @@ -0,0 +1,390 @@ +use aelys_air::layout::compute_layouts; +use aelys_air::lower::lower; +use aelys_air::mono::monomorphize; +use aelys_air::passes::copy_elim::eliminate_copies; +use aelys_air::passes::dead_locals::eliminate_dead_locals; +use aelys_air::passes::validate::validate_air; +use aelys_codegen::CodegenContext; +use aelys_driver::compile_file_with_llvm; +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_opt::OptimizationLevel; +use aelys_sema::TypeInference; +use aelys_syntax::Source; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use std::collections::HashSet; +use std::fs; +use tempfile::tempdir; + +fn compile_source_to_verified_ir_without_link(source: &str) -> String { + let src = Source::new("", source); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let typed = TypeInference::infer_program(stmts, src).expect("sema failed"); + let mut air = lower(&typed); + air = monomorphize(air).unwrap(); + compute_layouts(&mut air); + eliminate_copies(&mut air); + eliminate_dead_locals(&mut air); + validate_air(&air).expect("AIR should validate"); + + let dir = tempdir().expect("tempdir should be created"); + let ll_path = dir.path().join("module.ll"); + let ll_path_str = ll_path.to_string_lossy().to_string(); + + let mut codegen = CodegenContext::new("global_no_link"); + codegen + .compile(&air) + .expect("codegen compilation should succeed"); + codegen + .emit_ir(&ll_path_str) + .expect("llvm ir should be emitted"); + + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + ir +} + +fn lower_optimized_full(source: &str) -> aelys_air::AirProgram { + let src = Source::new("", source); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let inference = TypeInference::infer_program_full( + stmts, + src, + HashSet::new(), + HashSet::from(["print".to_string(), "println".to_string()]), + ) + .expect("sema failed"); + let mut opt = aelys_opt::Optimizer::new(OptimizationLevel::Standard); + let typed = opt.optimize(inference.program); + lower(&typed) +} + +#[test] +fn llvm_lowers_const_global_reads_to_real_global_storage() { + let ir = compile_source_to_verified_ir_without_link( + r#" +let g = 7 + +fn main() -> i64 { + return g +} +"#, + ); + assert!(ir.contains("@__aelys_global_g = internal global i64 7"), "{ir}"); + // Aelys-convention main receives an implicit env ptr; check the function exists with fastcc. + let main_decl = ir + .lines() + .find(|l| l.contains("define fastcc i64 @__aelys_main")) + .expect("__aelys_main must be defined"); + assert!( + main_decl.contains("fastcc"), + "__aelys_main must use fastcc:\n{main_decl}" + ); + assert!(ir.contains("load i64, ptr @__aelys_global_g"), "{ir}"); +} + +#[test] +fn llvm_lowers_fnptr_global_to_function_symbol() { + let ir = compile_source_to_verified_ir_without_link( + r#" +let f: fn() -> i64 = main + +fn main() -> i64 { + return f() +} +"#, + ); + // Aelys FnPtr globals are fat pointers { fn_ptr, env_ptr }; named fns have null env. + assert!( + ir.contains("@__aelys_global_f = internal global { ptr, ptr } { ptr @__aelys_main, ptr null }"), + "{ir}" + ); + assert!(ir.contains("load { ptr, ptr }, ptr @__aelys_global_f"), "{ir}"); + assert!(ir.contains("extractvalue { ptr, ptr }"), "{ir}"); + assert!(ir.contains("call fastcc i64 %"), "{ir}"); + assert!(!ir.contains("declare i64 @f()"), "{ir}"); +} + +#[test] +fn llvm_lowers_fnptr_global_alias_to_same_function_symbol() { + let ir = compile_source_to_verified_ir_without_link( + r#" +let g: fn() -> i64 = main +let h: fn() -> i64 = g + +fn main() -> i64 { + return h() +} +"#, + ); + // Both aliases should resolve to the same fat pointer { fn_ptr, null_env }. + assert!( + ir.contains("@__aelys_global_g = internal global { ptr, ptr } { ptr @__aelys_main, ptr null }"), + "{ir}" + ); + assert!( + ir.contains("@__aelys_global_h = internal global { ptr, ptr } { ptr @__aelys_main, ptr null }"), + "{ir}" + ); + assert!(ir.contains("load { ptr, ptr }, ptr @__aelys_global_h"), "{ir}"); + assert!(!ir.contains("unknown function 'g'"), "{ir}"); + assert!(!ir.contains("declare i64 @g()"), "{ir}"); +} + +#[test] +fn llvm_lowers_data_enum_global_alias_to_same_const_aggregate() { + let ir = compile_source_to_verified_ir_without_link( + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::None +let h: Option = g + +fn main() -> i64 { + return match h { + Option::Some(v) => v + Option::None => 9 + } +} +"#, + ); + assert!( + ir.contains("@__aelys_global_g = internal global %__aelys_enum___mono_Option_i64 { i32 1"), + "{ir}" + ); + assert!( + ir.contains("@__aelys_global_h = internal global %__aelys_enum___mono_Option_i64 { i32 1"), + "{ir}" + ); + assert!(ir.contains("load %__aelys_enum___mono_Option_i64, ptr @__aelys_global_h"), "{ir}"); +} + +#[test] +fn llvm_lowers_simple_enum_global_to_i32_storage() { + let ir = compile_source_to_verified_ir_without_link( + r#" +enum Color { + Red, + Green, +} + +let c: Color = Color::Green + +fn read_color() -> i64 { + return match c { + Color::Red => 1 + Color::Green => 2 + } +} +"#, + ); + assert!(ir.contains("@__aelys_global_c = internal global i32 1"), "{ir}"); + assert!(ir.contains("load i32, ptr @__aelys_global_c"), "{ir}"); +} + +#[test] +fn lowering_keeps_const_initializer_for_simple_enum_globals_after_optimizer() { + let air = lower_optimized_full( + r#" +enum Color { + Red, + Green, +} + +let c: Color = Color::Green + +fn read_color() -> i64 { + return match c { + Color::Red => 1 + Color::Green => 2 + } +} +"#, + ); + + let global = air + .globals + .iter() + .find(|global| global.name == "c") + .expect("global c should exist"); + assert!(matches!( + global.init, + Some(aelys_air::AirConst::Int(1, aelys_air::AirIntSize::I32)) + )); +} + +#[test] +fn driver_compiles_simple_enum_global_initializer() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +enum Color { + Red, + Green, +} + +let c: Color = Color::Green + +fn read_color() -> i64 { + return match c { + Color::Red => 1 + Color::Green => 2 + } +} +"#, + ) + .expect("source should be written"); + + compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) + .expect("driver should compile simple enum globals"); + + let ir_path = source_path.with_extension("ll"); + let ir = fs::read_to_string(ir_path).expect("llvm ir should be emitted"); + assert!(ir.contains("define fastcc"), "{ir}"); + assert!(ir.contains("ret i64 2"), "{ir}"); +} + +#[test] +fn llvm_lowers_data_enum_unit_global_to_const_aggregate() { + let ir = compile_source_to_verified_ir_without_link( + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::None + +fn read_option() -> i64 { + return match g { + Option::Some(v) => v + Option::None => 33 + } +} +"#, + ); + assert!( + ir.contains("@__aelys_global_g = internal global %__aelys_enum___mono_Option_i64 { i32 1"), + "{ir}" + ); + assert!(ir.contains("zeroinitializer"), "{ir}"); +} + +#[test] +fn llvm_lowers_data_enum_payload_global_to_const_aggregate() { + let ir = compile_source_to_verified_ir_without_link( + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::Some(42) + +fn read_option() -> i64 { + return match g { + Option::Some(v) => v + Option::None => 0 + } +} +"#, + ); + assert!( + ir.contains("@__aelys_global_g = internal global %__aelys_enum___mono_Option_i64 { i32 0"), + "{ir}" + ); + assert!( + ir.contains("[8 x i8] c\"*\\00\\00\\00\\00\\00\\00\\00\"") + || ir.contains("[8 x i8] [i8 42, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0]"), + "{ir}" + ); +} + +#[test] +fn driver_compiles_data_enum_unit_global_initializer() { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write( + &source_path, + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::None + +fn read_option() -> i64 { + return match g { + Option::Some(v) => v + Option::None => 33 + } +} +"#, + ) + .expect("source should be written"); + + compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) + .expect("driver should compile unit data-enum globals"); + + let ir_path = source_path.with_extension("ll"); + let ir = fs::read_to_string(ir_path).expect("llvm ir should be emitted"); + assert!(ir.contains("ret i64 33"), "{ir}"); +} + +#[test] +fn lowering_keeps_const_initializer_for_payload_enum_globals_after_optimizer() { + let air = lower_optimized_full( + r#" +enum Option { + Some(T), + None, +} + +let g: Option = Option::Some(7) + +fn read_option() -> i64 { + return match g { + Option::Some(v) => v + Option::None => 0 + } +} + +"#, + ); + + let global = air + .globals + .iter() + .find(|global| global.name == "g") + .expect("global g should exist"); + assert!(matches!( + global.init, + Some(aelys_air::AirConst::Enum { ref enum_name, tag: 0, ref payload }) + if enum_name == "__mono_Option_i64" + && matches!( + payload.as_slice(), + [aelys_air::AirConst::Int(7, aelys_air::AirIntSize::I64)] + ) + )); +} diff --git a/aelys/tests/llvm_sret_tests.rs b/aelys/tests/llvm_sret_tests.rs new file mode 100644 index 0000000..aab342b --- /dev/null +++ b/aelys/tests/llvm_sret_tests.rs @@ -0,0 +1,670 @@ +use aelys_air::{ + AirBlock, AirConst, AirFunction, AirLocal, AirProgram, AirStmt, AirStmtKind, AirTerminator, + AirType, BlockId, Callee, CallingConv, FunctionAttribs, FunctionId, GcMode, InlineHint, + LocalId, Operand, Place, Rvalue, +}; +use aelys_codegen::CodegenContext; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use std::fs; +use tempfile::tempdir; + +fn compile_air_to_verified_ir(program: &AirProgram) -> String { + let dir = tempdir().expect("tempdir"); + let ll_path = dir.path().join("module.ll"); + let ll_str = ll_path.to_string_lossy().to_string(); + + let mut codegen = CodegenContext::new("sret_test"); + codegen.compile(program).expect("codegen should succeed"); + codegen.emit_ir(&ll_str).expect("emit_ir should succeed"); + + let ir = fs::read_to_string(&ll_path).expect("ir file should exist"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("ir should parse"); + module.verify().expect("module should verify"); + + ir +} + +fn default_attribs() -> FunctionAttribs { + FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + } +} + +#[test] +fn extern_c_struct_return_compiles_and_verifies() { + let program = AirProgram { + functions: vec![ + // extern "C" fn get_name() -> Str + AirFunction { + id: FunctionId(0), + name: "get_name".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Str, + locals: vec![], + blocks: vec![], + is_extern: true, + calling_conv: CallingConv::C, + attributes: default_attribs(), + span: None, + }, + // fn caller() -> i64 { let s = get_name(); s.len } + AirFunction { + id: FunctionId(1), + name: "caller".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![ + AirLocal { + id: LocalId(0), + ty: AirType::Str, + name: None, + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(1), + ty: AirType::I64, + name: None, + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(0)), + rvalue: Rvalue::Call { + func: Callee::Extern("get_name".to_string(), CallingConv::C), + args: vec![], + }, + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::FieldAccess { + base: Operand::Copy(LocalId(0)), + field: "len".to_string(), + }, + }, + span: None, + }, + ], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(1)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }, + ], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + if cfg!(target_os = "windows") { + // declaration should be void with sret ptr as first param + assert!( + ir.contains("declare void @get_name(ptr"), + "sret extern should be declared as void(ptr sret(...)): {ir}" + ); + assert!( + ir.contains("sret"), + "sret attribute should be present on Windows: {ir}" + ); + // should not return %__aelys_string directly + assert!( + !ir.contains("declare %__aelys_string @get_name()"), + "sret extern should not return struct directly on Windows: {ir}" + ); + } else { + // on non-Windows, struct is returned directly + assert!( + ir.contains("declare %__aelys_string @get_name()"), + "non-sret extern should return struct directly: {ir}" + ); + } +} + +/// An Aelys function with C calling convention returning a struct should usen, sret on Windows: the return is stored via the sret pointer and the function, returns void at the LLVM level. +#[test] +fn c_convention_aelys_fn_returning_struct_uses_sret() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "make_greeting".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Str, + locals: vec![AirLocal { + id: LocalId(0), + ty: AirType::Str, + name: None, + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(0)), + rvalue: Rvalue::Use(Operand::Const(AirConst::Str("hello".to_string()))), + }, + span: None, + }], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(0)))), + }], + is_extern: false, + calling_conv: CallingConv::C, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + if cfg!(target_os = "windows") { + // definition should be void with sret param + assert!( + ir.contains("define void @make_greeting(ptr"), + "sret function should be defined as void(ptr sret(...)): {ir}" + ); + assert!( + ir.contains("sret"), + "sret attribute should be present on Windows: {ir}" + ); + // the return should be `ret void`, not `ret %__aelys_string ...` + assert!( + ir.contains("ret void"), + "sret function should return void: {ir}" + ); + } else { + assert!( + ir.contains("define %__aelys_string @make_greeting()"), + "non-sret function should return struct directly: {ir}" + ); + } +} + +/// fastcc (Aelys-internal) functions returning structs should not use sret +/// LLVM handles the ABI internally for fastcc within the same module. +#[test] +fn fastcc_struct_return_does_not_use_sret() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "internal_fn".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Str, + locals: vec![AirLocal { + id: LocalId(0), + ty: AirType::Str, + name: None, + is_mut: false, + span: None, + }], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(0)), + rvalue: Rvalue::Use(Operand::Const(AirConst::Str("hello".to_string()))), + }, + span: None, + }], + terminator: AirTerminator::Return(Some(Operand::Copy(LocalId(0)))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + let ir = compile_air_to_verified_ir(&program); + + // fastcc should return struct directly, never sret. + // Aelys-convention functions have an implicit env ptr at param 0. + let fn_decl = ir + .lines() + .find(|l| l.contains("define fastcc %__aelys_string @internal_fn")) + .expect("internal_fn must be defined"); + assert!( + fn_decl.contains("%__aelys_string"), + "fastcc function should return struct directly: {fn_decl}" + ); + // sret should not appear anywhere for internal functions + let sret_on_internal = ir + .lines() + .any(|l| l.contains("internal_fn") && l.contains("sret")); + assert!(!sret_on_internal, "fastcc function must not use sret: {ir}"); +} + +#[test] +fn extern_c_data_enum_return_uses_sret() { + let program = AirProgram { + functions: vec![ + AirFunction { + id: FunctionId(0), + name: "get_opt".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Enum("Opt".to_string()), + locals: vec![], + blocks: vec![], + is_extern: true, + calling_conv: CallingConv::C, + attributes: default_attribs(), + span: None, + }, + AirFunction { + id: FunctionId(1), + name: "caller".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![ + AirLocal { + id: LocalId(0), + ty: AirType::Enum("Opt".to_string()), + name: None, + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(1), + ty: AirType::I32, + name: None, + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(0)), + rvalue: Rvalue::Call { + func: Callee::Extern("get_opt".to_string(), CallingConv::C), + args: vec![], + }, + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::EnumTag { + enum_name: "Opt".to_string(), + operand: Operand::Copy(LocalId(0)), + }, + }, + span: None, + }, + ], + terminator: AirTerminator::Return(Some(Operand::Const(AirConst::Int( + 0, + aelys_air::AirIntSize::I64, + )))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }, + ], + structs: vec![], + enums: vec![aelys_air::AirEnumDef { + name: "Opt".to_string(), + type_params: vec![], + variants: vec![ + aelys_air::AirEnumVariant { + name: "Some".to_string(), + payload: vec![AirType::I64], + tag: 0, + }, + aelys_air::AirEnumVariant { + name: "None".to_string(), + payload: vec![], + tag: 1, + }, + ], + span: None, + }], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::from([( + "Opt".to_string(), + aelys_air::layout::TypeLayout { size: 16, align: 8 }, + )]), + }; + + let ir = compile_air_to_verified_ir(&program); + + if cfg!(target_os = "windows") { + assert!( + ir.contains("declare void @get_opt(ptr"), + "data enum extern should use sret on Windows: {ir}" + ); + assert!( + ir.contains("sret"), + "data enum extern should carry sret attribute on Windows: {ir}" + ); + assert!( + !ir.contains("declare %__aelys_enum_Opt @get_opt()"), + "data enum extern must not return aggregate directly on Windows: {ir}" + ); + } +} + +#[test] +fn extern_c_data_enum_param_is_rejected() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "consume_opt".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![aelys_air::AirParam { + id: LocalId(0), + ty: AirType::Enum("Opt".to_string()), + name: "opt".to_string(), + span: None, + }], + ret_ty: AirType::Void, + locals: vec![], + blocks: vec![], + is_extern: true, + calling_conv: CallingConv::C, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![aelys_air::AirEnumDef { + name: "Opt".to_string(), + type_params: vec![], + variants: vec![ + aelys_air::AirEnumVariant { + name: "Some".to_string(), + payload: vec![AirType::I64], + tag: 0, + }, + aelys_air::AirEnumVariant { + name: "None".to_string(), + payload: vec![], + tag: 1, + }, + ], + span: None, + }], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::from([( + "Opt".to_string(), + aelys_air::layout::TypeLayout { size: 16, align: 8 }, + )]), + }; + + let mut codegen = CodegenContext::new("enum_param_reject"); + let err = codegen + .compile(&program) + .expect_err("extern C data enum param should be rejected"); + let rendered = err.to_string(); + assert!( + rendered.contains("enum parameter"), + "unexpected error for extern C data enum param: {rendered}" + ); +} + +#[test] +fn indirect_c_fnptr_data_enum_return_uses_sret() { + let program = AirProgram { + functions: vec![ + AirFunction { + id: FunctionId(0), + name: "get_opt".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Enum("Opt".to_string()), + locals: vec![], + blocks: vec![], + is_extern: true, + calling_conv: CallingConv::C, + attributes: default_attribs(), + span: None, + }, + AirFunction { + id: FunctionId(1), + name: "caller".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::I64, + locals: vec![ + AirLocal { + id: LocalId(0), + ty: AirType::FnPtr { + params: vec![], + ret: Box::new(AirType::Enum("Opt".to_string())), + conv: CallingConv::C, + }, + name: None, + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(1), + ty: AirType::Enum("Opt".to_string()), + name: None, + is_mut: false, + span: None, + }, + AirLocal { + id: LocalId(2), + ty: AirType::I32, + name: None, + is_mut: false, + span: None, + }, + ], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![ + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(0)), + rvalue: Rvalue::Use(Operand::Const(AirConst::FnRef( + "get_opt".to_string(), + ))), + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(1)), + rvalue: Rvalue::Call { + func: Callee::FnPtr(LocalId(0)), + args: vec![], + }, + }, + span: None, + }, + AirStmt { + kind: AirStmtKind::Assign { + place: Place::Local(LocalId(2)), + rvalue: Rvalue::EnumTag { + enum_name: "Opt".to_string(), + operand: Operand::Copy(LocalId(1)), + }, + }, + span: None, + }, + ], + terminator: AirTerminator::Return(Some(Operand::Const(AirConst::Int( + 0, + aelys_air::AirIntSize::I64, + )))), + }], + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: default_attribs(), + span: None, + }, + ], + structs: vec![], + enums: vec![aelys_air::AirEnumDef { + name: "Opt".to_string(), + type_params: vec![], + variants: vec![ + aelys_air::AirEnumVariant { + name: "Some".to_string(), + payload: vec![AirType::I64], + tag: 0, + }, + aelys_air::AirEnumVariant { + name: "None".to_string(), + payload: vec![], + tag: 1, + }, + ], + span: None, + }], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::from([( + "Opt".to_string(), + aelys_air::layout::TypeLayout { size: 16, align: 8 }, + )]), + }; + + let ir = compile_air_to_verified_ir(&program); + + if cfg!(target_os = "windows") { + let indirect_call_line = ir + .lines() + .find(|line| line.contains("call") && line.contains("sret_slot")) + .expect("expected indirect call using the hidden sret slot"); + assert!( + indirect_call_line.contains("call void @get_opt(") + || indirect_call_line.contains("call void %"), + "indirect c fnptr should lower through a call instruction: {indirect_call_line}\n{ir}" + ); + assert!( + indirect_call_line.contains("sret("), + "indirect c fnptr callsite must carry the sret attribute: {indirect_call_line}\n{ir}" + ); + assert!(ir.contains("sret_slot"), "{ir}"); + assert!( + !ir.contains("call %__aelys_enum_Opt @get_opt()"), + "indirect c fnptr must not return the aggregate directly on Windows: {ir}" + ); + } +} + +#[test] +fn c_convention_defined_data_enum_param_is_rejected() { + let program = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "consume_opt".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![aelys_air::AirParam { + id: LocalId(0), + ty: AirType::Enum("Opt".to_string()), + name: "opt".to_string(), + span: None, + }], + ret_ty: AirType::Void, + locals: vec![], + blocks: vec![AirBlock { + id: BlockId(0), + stmts: vec![], + terminator: AirTerminator::Return(None), + }], + is_extern: false, + calling_conv: CallingConv::C, + attributes: default_attribs(), + span: None, + }], + structs: vec![], + enums: vec![aelys_air::AirEnumDef { + name: "Opt".to_string(), + type_params: vec![], + variants: vec![ + aelys_air::AirEnumVariant { + name: "Some".to_string(), + payload: vec![AirType::I64], + tag: 0, + }, + aelys_air::AirEnumVariant { + name: "None".to_string(), + payload: vec![], + tag: 1, + }, + ], + span: None, + }], + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::from([( + "Opt".to_string(), + aelys_air::layout::TypeLayout { size: 16, align: 8 }, + )]), + }; + + let mut codegen = CodegenContext::new("c_param_reject"); + let err = codegen + .compile(&program) + .expect_err("C-convention data enum param should be rejected"); + let rendered = err.to_string(); + assert!( + rendered.contains("enum parameter"), + "unexpected error for C-convention data enum param: {rendered}" + ); +} diff --git a/aelys/tests/llvm_string_abi_hardening_tests.rs b/aelys/tests/llvm_string_abi_hardening_tests.rs new file mode 100644 index 0000000..95f07f1 --- /dev/null +++ b/aelys/tests/llvm_string_abi_hardening_tests.rs @@ -0,0 +1,133 @@ +use aelys_driver::compile_file_with_llvm; +use aelys_opt::OptimizationLevel; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use std::fs; +use tempfile::tempdir; + +fn compile_source_to_verified_ir_with_opt(source: &str, opt: OptimizationLevel) -> String { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write(&source_path, source).expect("source should be written"); + + compile_file_with_llvm(&source_path, opt, true) + .expect("llvm backend compilation should succeed"); + + let ll_path = source_path.with_extension("ll"); + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + ir +} + +fn compile_source_to_verified_ir(source: &str) -> String { + compile_source_to_verified_ir_with_opt(source, OptimizationLevel::Standard) +} + +fn compile_source_expect_error(source: &str) -> String { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write(&source_path, source).expect("source should be written"); + + match compile_file_with_llvm(&source_path, OptimizationLevel::Standard, true) { + Ok(()) => panic!("expected llvm compilation to fail"), + Err(err) => err.to_string(), + } +} + +// TODO: remove when stdlib bootstrap +#[test] +fn llvm_rejects_user_defined_reserved_bootstrap_println() { + let error = compile_source_expect_error( + r#" +fn println(s: string) { + return +} +"#, + ); + // either "reserved builtin" (from AIR lowering) or "duplicate function definition" from sema, since println is registered as a bootstrap builtin + assert!( + (error.contains("reserved builtin during bootstrap") + || error.contains("duplicate function definition")) + && error.contains("println"), + "{error}" + ); +} + +#[test] +fn llvm_println_bootstrap_uses_write_ptr_len_only() { + let ir = compile_source_to_verified_ir( + r#" +fn greet() { + println("Hello") +} +"#, + ); + + assert!(ir.contains("declare void @__aelys_write(ptr, i64)"), "{ir}"); + assert!(!ir.contains("declare i64 @println("), "{ir}"); + assert!(!ir.contains("declare void @println("), "{ir}"); + assert!(!ir.contains("declare i64 @print("), "{ir}"); + assert!(!ir.contains("declare void @print("), "{ir}"); + assert!(!ir.contains("@println("), "{ir}"); + assert!(!ir.contains("@print("), "{ir}"); + assert!(ir.contains("c\"Hello\\00\""), "{ir}"); + assert!(!ir.contains("c\"\\00Hello"), "{ir}"); + assert!(!ir.contains(", i64 0, i64 1"), "{ir}"); + assert!(ir.contains("call void @__aelys_write"), "{ir}"); + assert!(ir.contains("i64 5"), "{ir}"); +} + +#[test] +fn llvm_user_str_param_is_passed_as_aelys_string_struct() { + let ir = compile_source_to_verified_ir_with_opt( + r#" +fn sink(s: string) -> i64 { + print(s) + return 0 +} + +fn caller() -> i64 { + return sink("Hello") +} +"#, + OptimizationLevel::None, + ); + + // Aelys-convention functions prepend an implicit env ptr; the string param follows. + let sink_decl = ir + .lines() + .find(|l| l.contains("define fastcc i64 @sink")) + .expect("sink must be defined"); + assert!( + sink_decl.contains("%__aelys_string"), + "string param must be %__aelys_string struct:\n{sink_decl}" + ); + assert!( + ir.contains("call fastcc i64 @sink("), + "caller should call sink: {ir}" + ); +} + +#[test] +fn unknown_field_on_str() { + let error = compile_source_expect_error( + r#" +fn main() -> i64 { + let s = "Hello" + return s.foo +} +"#, + ); + assert!( + error.contains("unknown field 'foo' on Str; supported: 'len'"), + "{error}" + ); +} diff --git a/aelys/tests/manifest_tests.rs b/aelys/tests/manifest_tests.rs deleted file mode 100644 index f416709..0000000 --- a/aelys/tests/manifest_tests.rs +++ /dev/null @@ -1,18 +0,0 @@ -use aelys_modules::manifest::Manifest; - -#[test] -fn parse_manifest_modules_and_build_flags() { - let raw = r#" - [module.opengl] - capabilities = ["gpu", "window"] - required_version = ">=0.2.0" - - [build] - bundle_native_modules = true - "#; - - let manifest = Manifest::parse(raw).expect("parse"); - let opengl = manifest.module("opengl").expect("module"); - assert!(opengl.capabilities.contains(&"gpu".to_string())); - assert_eq!(manifest.build.bundle_native_modules, Some(true)); -} diff --git a/aelys/tests/manual_heap_tests.rs b/aelys/tests/manual_heap_tests.rs deleted file mode 100644 index 88ead91..0000000 --- a/aelys/tests/manual_heap_tests.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Unit tests for ManualHeap struct. - -use aelys_runtime::Value; -use aelys_runtime::manual_heap::{ManualHeap, ManualHeapError}; - -#[test] -fn test_alloc_basic() { - let mut heap = ManualHeap::new(); - let h = heap.alloc(10, 0).unwrap(); - assert_eq!(heap.load(h, 0).unwrap(), Value::null()); - assert_eq!(heap.size(h).unwrap(), 10); -} - -#[test] -fn test_store_load() { - let mut heap = ManualHeap::new(); - let h = heap.alloc(10, 0).unwrap(); - heap.store(h, 5, Value::int(42)).unwrap(); - assert_eq!(heap.load(h, 5).unwrap().as_int(), Some(42)); -} - -#[test] -fn test_alloc_zero_fails() { - let mut heap = ManualHeap::new(); - assert!(matches!( - heap.alloc(0, 0), - Err(ManualHeapError::InvalidSize) - )); -} - -#[test] -fn test_double_free() { - let mut heap = ManualHeap::new(); - let h = heap.alloc(10, 0).unwrap(); - heap.free(h, 1).unwrap(); - assert!(matches!( - heap.free(h, 2), - Err(ManualHeapError::DoubleFree { .. }) - )); -} - -#[test] -fn test_use_after_free_load() { - let mut heap = ManualHeap::new(); - let h = heap.alloc(10, 0).unwrap(); - heap.free(h, 1).unwrap(); - assert!(matches!( - heap.load(h, 0), - Err(ManualHeapError::UseAfterFree { .. }) - )); -} - -#[test] -fn test_use_after_free_store() { - let mut heap = ManualHeap::new(); - let h = heap.alloc(10, 0).unwrap(); - heap.free(h, 1).unwrap(); - assert!(matches!( - heap.store(h, 0, Value::int(1)), - Err(ManualHeapError::UseAfterFree { .. }) - )); -} - -#[test] -fn test_out_of_bounds() { - let mut heap = ManualHeap::new(); - let h = heap.alloc(5, 0).unwrap(); - assert!(matches!( - heap.load(h, 10), - Err(ManualHeapError::OutOfBounds { - offset: 10, - size: 5 - }) - )); - assert!(matches!( - heap.store(h, 10, Value::int(1)), - Err(ManualHeapError::OutOfBounds { - offset: 10, - size: 5 - }) - )); -} - -#[test] -fn test_invalid_handle() { - let heap = ManualHeap::new(); - assert!(matches!( - heap.load(999, 0), - Err(ManualHeapError::InvalidHandle) - )); -} - -#[test] -fn test_free_reuses_slots() { - let mut heap = ManualHeap::new(); - let h1 = heap.alloc(10, 0).unwrap(); - heap.free(h1, 1).unwrap(); - let h2 = heap.alloc(5, 2).unwrap(); - assert_eq!(h1, h2); // Same slot reused -} diff --git a/aelys/tests/memory_safety_tests.rs b/aelys/tests/memory_safety_tests.rs deleted file mode 100644 index 543a3d2..0000000 --- a/aelys/tests/memory_safety_tests.rs +++ /dev/null @@ -1,176 +0,0 @@ -#[test] -fn test_arc_bytecode_pointer_stability() { - use aelys_runtime::Function; - - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 2; - - func.set_bytecode(vec![ - 0x01_00_00_2A, // LoadI r0, 42 - 0x22_00_00_00, // Print r0 - 0x21_00_00_00, // Return0 - ]); - - // The bytecode is now an Arc<[u32]>, so taking pointers is safe - let bytecode_ptr_1 = func.bytecode.as_ptr(); - - // Clone the function (Arc clone, not deep copy) - let func2 = func.clone(); - let bytecode_ptr_2 = func2.bytecode.as_ptr(); - - // Pointers should be identical (same allocation) - assert_eq!(bytecode_ptr_1, bytecode_ptr_2); -} - -#[test] -fn test_bytecode_finalization() { - use aelys_runtime::Function; - - let mut func = Function::new(Some("test".to_string()), 0); - - // Push raw bytecode during compilation - func.push_raw(0x01_00_00_01); // LoadI r0, 1 - func.push_raw(0x21_00_00_00); // Return0 - - // Before finalization, bytecode Arc should be empty - assert_eq!(func.bytecode.len(), 0); - - // Finalize to transfer builder to immutable Arc - func.finalize_bytecode(); - - // After finalization, bytecode should be populated - assert_eq!(func.bytecode.len(), 2); - assert_eq!(func.bytecode[0], 0x01_00_00_01); - assert_eq!(func.bytecode[1], 0x21_00_00_00); -} - -#[test] -fn test_path_traversal_rejection() { - use aelys_driver::modules::ModuleLoader; - use aelys_syntax::Source; - use std::path::PathBuf; - - let source = Source::new("test.aelys", ""); - let entry_path = PathBuf::from("/tmp/test.aelys"); - let loader = ModuleLoader::new(&entry_path, source); - - // Test path traversal attempts - these should all fail - let malicious_paths = vec![ - vec!["..".to_string(), "etc".to_string(), "passwd".to_string()], - vec![".".to_string(), "hidden".to_string()], - vec!["foo/bar".to_string()], // Contains path separator - vec!["foo\\bar".to_string()], // Contains backslash - ]; - - for path in malicious_paths { - let result = loader.resolve_path(&path); - // Should fail with ModuleNotFound (path validation failed) - assert!(result.is_err(), "Path {:?} should be rejected", path); - } -} - -#[test] -fn test_valid_module_paths() { - use aelys_driver::modules::ModuleLoader; - use aelys_syntax::Source; - use std::path::PathBuf; - - let source = Source::new("test.aelys", ""); - let entry_path = PathBuf::from("/tmp/test.aelys"); - let loader = ModuleLoader::new(&entry_path, source); - - // Valid paths (just names without path components) - let valid_paths = vec![ - vec!["utils".to_string()], - vec!["math".to_string(), "helpers".to_string()], - vec!["my_module".to_string()], - ]; - - for path in valid_paths { - let result = loader.resolve_path(&path); - // These may fail because files don't exist, but they should NOT fail - // due to path traversal validation (error will be ModuleNotFound with searched_paths) - if let Err(err) = result { - let err_str = format!("{}", err); - // Should have searched_paths (legitimate module not found), not empty (path traversal) - assert!( - err_str.contains("searched in:") || !err_str.contains("[]"), - "Path {:?} should pass validation but got: {}", - path, - err_str - ); - } - } -} - -#[cfg(debug_assertions)] -#[test] -#[should_panic(expected = "type confusion")] -fn test_debug_assertion_int_unchecked_on_float() { - use aelys_runtime::Value; - - let float_val = Value::float(2.72); - let _ = float_val.as_int_unchecked(); -} - -#[cfg(debug_assertions)] -#[test] -#[should_panic(expected = "type confusion")] -fn test_debug_assertion_float_unchecked_on_int() { - use aelys_runtime::Value; - - let int_val = Value::int(42); - let _ = int_val.as_float_unchecked(); -} - -#[test] -fn test_correct_type_conversions() { - use aelys_runtime::Value; - - let int_val = Value::int(42); - let float_val = Value::float(2.72); - - // These should work without panic - assert_eq!(int_val.as_int_unchecked(), 42); - assert!((float_val.as_float_unchecked() - 2.72).abs() < 0.001); -} - -#[test] -fn test_nested_function_finalization() { - use aelys_runtime::Function; - - let mut outer = Function::new(Some("outer".to_string()), 0); - outer.push_raw(0x01_00_00_01); // LoadI r0, 1 - - let mut inner = Function::new(Some("inner".to_string()), 0); - inner.push_raw(0x01_00_00_02); // LoadI r0, 2 - inner.push_raw(0x21_00_00_00); // Return0 - - outer.nested_functions.push(inner); - outer.push_raw(0x21_00_00_00); // Return0 - - outer.finalize_bytecode(); - - // Both should be finalized - assert_eq!(outer.bytecode.len(), 2); - assert_eq!(outer.nested_functions[0].bytecode.len(), 2); -} - -#[test] -fn test_deep_recursion_works() { - use aelys::run; - - let source = r#" -fn countdown(n) { - if n <= 0 { - return 0 - } - countdown(n - 1) -} - -countdown(100) -"#; - - let result = run(source, "test.aelys"); - assert!(result.is_ok(), "Deep recursion should work: {:?}", result); -} diff --git a/aelys/tests/memory_tests.rs b/aelys/tests/memory_tests.rs deleted file mode 100644 index 9260cb2..0000000 --- a/aelys/tests/memory_tests.rs +++ /dev/null @@ -1,244 +0,0 @@ -//! Integration tests for manual memory operations. - -mod common; -use common::*; - -#[test] -fn test_alloc_store_load_free() { - let code = r#" -let p = alloc(10) -store(p, 0, 42) -store(p, 9, 100) -let a = load(p, 0) -let b = load(p, 9) -free(p) -a + b -"#; - let result = run_aelys(code); - assert_eq!(result.as_int(), Some(142)); // 42 + 100 -} - -#[test] -fn test_double_free_error() { - let code = r#" -let p = alloc(10) -free(p) -free(p) -"#; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("double free") || err.to_lowercase().contains("freed"), - "Expected double free error, got: {}", - err - ); -} - -#[test] -fn test_use_after_free_load() { - let code = r#" -let p = alloc(10) -free(p) -load(p, 0) -"#; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("free") || err.to_lowercase().contains("freed"), - "Expected use after free error, got: {}", - err - ); -} - -#[test] -fn test_use_after_free_store() { - let code = r#" -let p = alloc(10) -free(p) -store(p, 0, 1) -"#; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("free") || err.to_lowercase().contains("freed"), - "Expected use after free error, got: {}", - err - ); -} - -#[test] -fn test_out_of_bounds() { - let code = r#" -let p = alloc(5) -load(p, 10) -"#; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("bound") || err.to_lowercase().contains("size"), - "Expected out of bounds error, got: {}", - err - ); -} - -#[test] -fn test_alloc_zero_fails() { - let code = "let p = alloc(0)"; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("size") || err.to_lowercase().contains("allocation"), - "Expected invalid size error, got: {}", - err - ); -} - -#[test] -fn test_negative_offset() { - let code = r#" -let p = alloc(10) -load(p, -1) -"#; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("negative") || err.to_lowercase().contains("index"), - "Expected negative index error, got: {}", - err - ); -} - -#[test] -fn test_free_null_is_noop() { - let code = r#" -free(null) -42 -"#; - let result = run_aelys(code); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_multiple_allocations() { - let code = r#" -let p1 = alloc(5) -let p2 = alloc(10) -let p3 = alloc(3) -store(p1, 0, 1) -store(p2, 0, 2) -store(p3, 0, 3) -let a = load(p1, 0) -let b = load(p2, 0) -let c = load(p3, 0) -free(p1) -free(p2) -free(p3) -a + b + c -"#; - let result = run_aelys(code); - assert_eq!(result.as_int(), Some(6)); // 1 + 2 + 3 -} - -#[test] -fn test_store_different_types() { - let code = r#" -let p = alloc(10) -store(p, 0, 42) -store(p, 1, true) -store(p, 2, null) -store(p, 3, 3.14) -let a = load(p, 0) -let b = load(p, 1) -let c = load(p, 2) -free(p) -a -"#; - let result = run_aelys(code); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_alloc_boundary_last_element() { - let code = r#" -let p = alloc(5) -store(p, 4, 99) -let val = load(p, 4) -free(p) -val -"#; - let result = run_aelys(code); - assert_eq!(result.as_int(), Some(99)); -} - -#[test] -fn test_store_out_of_bounds() { - let code = r#" -let p = alloc(5) -store(p, 5, 42) -"#; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("bound") || err.to_lowercase().contains("size"), - "Expected out of bounds error, got: {}", - err - ); -} - -#[test] -fn test_negative_handle_free() { - // free(-1) silently ignores negative handles (like C's free(NULL)) - let code = "free(-1)"; - run_aelys_ok(code); // should succeed without error -} - -#[test] -fn test_negative_size_alloc() { - let code = "alloc(-10)"; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("non-negative") - || err.to_lowercase().contains("size") - || err.to_lowercase().contains("allocation"), - "Expected invalid size error, got: {}", - err - ); -} - -#[test] -fn test_manual_memory_in_function() { - let code = r#" -fn test_mem() { - let p = alloc(3) - store(p, 0, 10) - store(p, 1, 20) - store(p, 2, 30) - let sum = load(p, 0) + load(p, 1) + load(p, 2) - free(p) - return sum -} -test_mem() -"#; - let result = run_aelys(code); - assert_eq!(result.as_int(), Some(60)); // 10 + 20 + 30 -} - -#[test] -fn test_manual_memory_with_loop() { - let code = r#" -let p = alloc(10) -let mut i = 0 -while i < 10 { - store(p, i, i * 2) - i++ -} -let sum = 0 -let mut j = 0 -while j < 10 { - let val = load(p, j) - j++ -} -free(p) -load(p, 0) -"#; - // This should fail because we're loading after free - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("free") || err.to_lowercase().contains("freed"), - "Expected use after free error, got: {}", - err - ); -} diff --git a/aelys/tests/module_tests.rs b/aelys/tests/module_tests.rs deleted file mode 100644 index a981747..0000000 --- a/aelys/tests/module_tests.rs +++ /dev/null @@ -1,797 +0,0 @@ -use std::fs::{self, File}; -use std::io::Write; -use std::path::PathBuf; -use tempfile::TempDir; - -use aelys_driver::run_file; - -/// Helper to create a benchmarks directory with module files -fn create_module_env() -> TempDir { - tempfile::tempdir().expect("Failed to create temp dir") -} - -/// Helper to write a file in the temp directory -fn write_file(dir: &TempDir, path: &str, content: &str) -> PathBuf { - let file_path = dir.path().join(path); - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).expect("Failed to create parent directories"); - } - let mut file = File::create(&file_path).expect("Failed to create file"); - write!(file, "{}", content).expect("Failed to write file"); - file_path -} - -// ==Basic Module Import Tests== - -#[test] -fn test_basic_module_import() { - let dir = create_module_env(); - - write_file( - &dir, - "utils.aelys", - r#" -pub fn double(x) { x * 2 } -pub let FACTOR = 10 -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs utils -needs print from std.io -print(utils.double(5)) -utils.FACTOR -"#, - ); - - let result = run_file(&main_path).expect("Module import should succeed"); - assert_eq!(result.as_int(), Some(10)); -} - -#[test] -fn test_module_import_with_alias() { - let dir = create_module_env(); - - write_file( - &dir, - "utilities.aelys", - r#" -pub fn triple(x) { x * 3 } -pub let VALUE = 42 -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs utilities as u -u.triple(7) + u.VALUE -"#, - ); - - let result = run_file(&main_path).expect("Alias import should succeed"); - assert_eq!(result.as_int(), Some(21 + 42)); // 63 -} - -#[test] -fn test_std_module_import_with_alias() { - let dir = create_module_env(); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs std.math as m -m.sin(0) -"#, - ); - - let result = run_file(&main_path).expect("Std alias import should succeed"); - assert_eq!(result.as_float(), Some(0.0)); -} - -#[test] -fn test_std_symbol_import_from() { - let dir = create_module_env(); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs cos from std.math -cos(0) -"#, - ); - - let result = run_file(&main_path).expect("Std symbol import should succeed"); - assert_eq!(result.as_float(), Some(1.0)); -} - -#[test] -fn test_wildcard_import() { - let dir = create_module_env(); - - write_file( - &dir, - "math.aelys", - r#" -pub fn square(x) { x * x } -pub let PI = 3 -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs math.* -square(5) + PI -"#, - ); - - let result = run_file(&main_path).expect("Wildcard import should succeed"); - assert_eq!(result.as_int(), Some(25 + 3)); // 28 -} - -#[test] -fn test_specific_symbol_import() { - let dir = create_module_env(); - - write_file( - &dir, - "funcs.aelys", - r#" -pub fn add(a, b) { a + b } -pub fn sub(a, b) { a - b } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs funcs.add -add(10, 5) -"#, - ); - - let result = run_file(&main_path).expect("Symbol import should succeed"); - assert_eq!(result.as_int(), Some(15)); -} - -// ==Nested Module Tests== - -#[test] -fn test_nested_module_path() { - let dir = create_module_env(); - - write_file( - &dir, - "helpers/math.aelys", - r#" -pub fn cube(x) { x * x * x } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs helpers.math -math.cube(3) -"#, - ); - - let result = run_file(&main_path).expect("Nested module should succeed"); - assert_eq!(result.as_int(), Some(27)); -} - -#[test] -fn test_nested_module_with_mod_aelys() { - let dir = create_module_env(); - - write_file( - &dir, - "utils/mod.aelys", - r#" -pub fn helper() { 100 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs utils -utils.helper() -"#, - ); - - let result = run_file(&main_path).expect("mod.aelys module should succeed"); - assert_eq!(result.as_int(), Some(100)); -} - -// ==Visibility Tests== - -#[test] -fn test_private_function_not_exported() { - let dir = create_module_env(); - - write_file( - &dir, - "private_mod.aelys", - r#" -fn secret() { 42 } -pub fn public() { secret() } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs private_mod -private_mod.public() -"#, - ); - - // Public function that calls private should work - let result = run_file(&main_path).expect("Public function should succeed"); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_private_function_returns_null() { - let dir = create_module_env(); - - write_file( - &dir, - "private_mod.aelys", - r#" -fn secret() { 42 } -pub fn public() { 1 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs private_mod -private_mod.secret -"#, - ); - - // Private function access returns null (not exported) - let result = run_file(&main_path).expect("Should succeed but return null"); - assert!(result.is_null()); -} - -#[test] -fn test_private_let_not_exported() { - let dir = create_module_env(); - - write_file( - &dir, - "private_mod.aelys", - r#" -let secret = 100 -pub let public = 200 -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs private_mod -private_mod.public -"#, - ); - - let result = run_file(&main_path).expect("Public let should be accessible"); - assert_eq!(result.as_int(), Some(200)); -} - -// ==Circular Dependency Tests== - -#[test] -fn test_circular_dependency_detected() { - let dir = create_module_env(); - - write_file( - &dir, - "a.aelys", - r#" -needs b -pub fn a_func() { 1 } -"#, - ); - - write_file( - &dir, - "b.aelys", - r#" -needs a -pub fn b_func() { 2 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs a -a.a_func() -"#, - ); - - let result = run_file(&main_path); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!( - err.contains("circular dependency"), - "Error should mention circular dependency: {}", - err - ); -} - -#[test] -fn test_self_import_detected() { - let dir = create_module_env(); - - write_file( - &dir, - "self_import.aelys", - r#" -needs self_import -pub fn foo() { 1 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs self_import -self_import.foo() -"#, - ); - - let result = run_file(&main_path); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!( - err.contains("circular dependency"), - "Self-import should be detected: {}", - err - ); -} - -// ==Module Not Found Tests== - -#[test] -fn test_module_not_found() { - let dir = create_module_env(); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs nonexistent -nonexistent.foo() -"#, - ); - - let result = run_file(&main_path); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!( - err.contains("module not found") || err.contains("not found"), - "Error should mention module not found: {}", - err - ); -} - -#[test] -fn test_symbol_not_found_in_module() { - let dir = create_module_env(); - - write_file( - &dir, - "utils.aelys", - r#" -pub fn existing() { 1 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs utils.nonexistent -nonexistent() -"#, - ); - - let result = run_file(&main_path); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!( - err.contains("not found") || err.contains("nonexistent"), - "Error should mention symbol not found: {}", - err - ); -} - -// ==Multiple Imports Tests== - -#[test] -fn test_multiple_module_imports() { - let dir = create_module_env(); - - write_file( - &dir, - "mod_a.aelys", - r#" -pub fn a() { 10 } -"#, - ); - - write_file( - &dir, - "mod_b.aelys", - r#" -pub fn b() { 20 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs mod_a -needs mod_b -mod_a.a() + mod_b.b() -"#, - ); - - let result = run_file(&main_path).expect("Multiple imports should succeed"); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_module_import_with_dependencies() { - let dir = create_module_env(); - - write_file( - &dir, - "base.aelys", - r#" -pub fn base_func() { 5 } -"#, - ); - - write_file( - &dir, - "derived.aelys", - r#" -needs base -pub fn derived_func() { base.base_func() * 2 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs derived -derived.derived_func() -"#, - ); - - let result = run_file(&main_path).expect("Module with dependencies should succeed"); - assert_eq!(result.as_int(), Some(10)); -} - -// ==Edge Cases== - -#[test] -fn test_empty_module() { - let dir = create_module_env(); - - write_file( - &dir, - "empty.aelys", - r#" -// Empty module - no exports -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs empty -42 -"#, - ); - - let result = run_file(&main_path).expect("Empty module should be allowed"); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_same_module_imported_twice() { - let dir = create_module_env(); - - write_file( - &dir, - "utils.aelys", - r#" -pub let COUNTER = 1 -"#, - ); - - write_file( - &dir, - "mod_a.aelys", - r#" -needs utils -pub fn get_counter() { utils.COUNTER } -"#, - ); - - write_file( - &dir, - "mod_b.aelys", - r#" -needs utils -pub fn get_counter_too() { utils.COUNTER } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs mod_a -needs mod_b -mod_a.get_counter() + mod_b.get_counter_too() -"#, - ); - - // Same module should only be loaded once (cached) - let result = run_file(&main_path).expect("Diamond dependency should work"); - assert_eq!(result.as_int(), Some(2)); -} - -// ==Direct Import Tests== - -#[test] -fn test_std_direct_import() { - let dir = create_module_env(); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs std.math -sin(0) + cos(0) -"#, - ); - - let result = run_file(&main_path).expect("Direct import should work"); - assert_eq!(result.as_float(), Some(1.0)); -} - -#[test] -fn test_std_direct_and_qualified() { - let dir = create_module_env(); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs std.math -sin(0) + math.cos(0) -"#, - ); - - let result = run_file(&main_path).expect("Both forms should work"); - assert_eq!(result.as_float(), Some(1.0)); -} - -#[test] -fn test_alias_with_auto_registered_globals() { - // Auto-registered stdlib functions are always available, even when - // the module is also imported with an alias. Both `sin(0)` and - // `m.sin(0)` should work. - let dir = create_module_env(); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs std.math as m -sin(0) -"#, - ); - - let result = run_file(&main_path); - assert!( - result.is_ok(), - "sin() should be available from auto-registration" - ); -} - -#[test] -fn test_alias_qualified_works() { - let dir = create_module_env(); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs std.math as m -m.sin(0) + m.cos(0) -"#, - ); - - let result = run_file(&main_path).expect("Aliased qualified access should work"); - assert_eq!(result.as_float(), Some(1.0)); -} - -#[test] -fn test_custom_module_direct_import() { - let dir = create_module_env(); - - write_file( - &dir, - "mymath.aelys", - r#" -pub fn double(x) { x * 2 } -pub fn triple(x) { x * 3 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs mymath -double(5) + triple(2) -"#, - ); - - let result = run_file(&main_path).expect("Direct import for custom modules should work"); - assert_eq!(result.as_int(), Some(16)); -} - -#[test] -fn test_custom_module_alias_qualified_works() { - let dir = create_module_env(); - - write_file( - &dir, - "mymath.aelys", - r#" -pub fn double(x) { x * 2 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs mymath as mm -mm.double(5) -"#, - ); - - let result = run_file(&main_path).expect("Aliased qualified access should work"); - assert_eq!(result.as_int(), Some(10)); -} - -#[test] -fn test_custom_module_alias_no_direct() { - let dir = create_module_env(); - - write_file( - &dir, - "mymath.aelys", - r#" -pub fn double(x) { x * 2 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs mymath as mm -double(5) -"#, - ); - - let result = run_file(&main_path); - assert!(result.is_err()); -} - -#[test] -fn test_symbol_conflict_error() { - let dir = create_module_env(); - - write_file( - &dir, - "mod_a.aelys", - r#" -pub fn shared() { 1 } -"#, - ); - - write_file( - &dir, - "mod_b.aelys", - r#" -pub fn shared() { 2 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs mod_a -needs mod_b -shared() -"#, - ); - - let result = run_file(&main_path); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!( - err.contains("conflict") || err.contains("shared"), - "Should detect symbol conflict: {}", - err - ); -} - -#[test] -fn test_no_conflict_with_both_aliased() { - let dir = create_module_env(); - - write_file( - &dir, - "mod_a.aelys", - r#" -pub fn shared() { 1 } -"#, - ); - - write_file( - &dir, - "mod_b.aelys", - r#" -pub fn shared() { 2 } -"#, - ); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs mod_a as a -needs mod_b as b -a.shared() + b.shared() -"#, - ); - - let result = run_file(&main_path).expect("Both aliased should work"); - assert_eq!(result.as_int(), Some(3)); -} diff --git a/aelys/tests/modules/basic/debug.aelys b/aelys/tests/modules/basic/debug.aelys deleted file mode 100644 index 200032d..0000000 --- a/aelys/tests/modules/basic/debug.aelys +++ /dev/null @@ -1,6 +0,0 @@ -needs utils - -print(type(utils.double)) -print(type(utils.FACTOR)) -print(utils.FACTOR) -print(utils.double(5)) diff --git a/aelys/tests/modules/basic/main.aelys b/aelys/tests/modules/basic/main.aelys deleted file mode 100644 index 595c291..0000000 --- a/aelys/tests/modules/basic/main.aelys +++ /dev/null @@ -1,5 +0,0 @@ -// main.aelys - Basic module import test -needs utils - -print(utils.double(5)) // Should print 10 -print(utils.FACTOR) // Should print 10 diff --git a/aelys/tests/modules/basic/utils.aelys b/aelys/tests/modules/basic/utils.aelys deleted file mode 100644 index cda05ce..0000000 --- a/aelys/tests/modules/basic/utils.aelys +++ /dev/null @@ -1,6 +0,0 @@ -// utils.aelys - Basic module with public and private symbols -pub fn double(x) { x * 2 } -pub let FACTOR = 10 - -fn private_helper() { 42 } -let private_state = 0 diff --git a/aelys/tests/modules/circular/a.aelys b/aelys/tests/modules/circular/a.aelys deleted file mode 100644 index 5dc9168..0000000 --- a/aelys/tests/modules/circular/a.aelys +++ /dev/null @@ -1,3 +0,0 @@ -// a.aelys - Creates circular dependency with b.aelys -needs b -pub fn from_a() { 1 } diff --git a/aelys/tests/modules/circular/b.aelys b/aelys/tests/modules/circular/b.aelys deleted file mode 100644 index 32e1664..0000000 --- a/aelys/tests/modules/circular/b.aelys +++ /dev/null @@ -1,3 +0,0 @@ -// b.aelys - Creates circular dependency with a.aelys -needs a -pub fn from_b() { 2 } diff --git a/aelys/tests/modules/errors/not_found.aelys b/aelys/tests/modules/errors/not_found.aelys deleted file mode 100644 index ff53812..0000000 --- a/aelys/tests/modules/errors/not_found.aelys +++ /dev/null @@ -1,2 +0,0 @@ -// not_found.aelys - Should error for nonexistent module -needs nonexistent diff --git a/aelys/tests/modules/errors/private_access.aelys b/aelys/tests/modules/errors/private_access.aelys deleted file mode 100644 index 6c28d71..0000000 --- a/aelys/tests/modules/errors/private_access.aelys +++ /dev/null @@ -1,4 +0,0 @@ -// private_access.aelys - Should error when accessing private symbol -needs utils - -utils.private() // Error: 'private' is not public in 'utils' diff --git a/aelys/tests/modules/errors/std_not_available.aelys b/aelys/tests/modules/errors/std_not_available.aelys deleted file mode 100644 index ef471ae..0000000 --- a/aelys/tests/modules/errors/std_not_available.aelys +++ /dev/null @@ -1,2 +0,0 @@ -// std_not_available.aelys - Should error for stdlib module -needs std.math diff --git a/aelys/tests/modules/errors/utils.aelys b/aelys/tests/modules/errors/utils.aelys deleted file mode 100644 index efdfdf7..0000000 --- a/aelys/tests/modules/errors/utils.aelys +++ /dev/null @@ -1,3 +0,0 @@ -// utils.aelys - Module with private function for testing -fn private() { 42 } -pub fn public() { 1 } diff --git a/aelys/tests/modules/mod_index/main.aelys b/aelys/tests/modules/mod_index/main.aelys deleted file mode 100644 index fc77d4b..0000000 --- a/aelys/tests/modules/mod_index/main.aelys +++ /dev/null @@ -1,4 +0,0 @@ -// main.aelys - Module index (mod.aelys) test -needs utils - -print(utils.utility()) // Should print "from mod.aelys" diff --git a/aelys/tests/modules/mod_index/utils/mod.aelys b/aelys/tests/modules/mod_index/utils/mod.aelys deleted file mode 100644 index 3867137..0000000 --- a/aelys/tests/modules/mod_index/utils/mod.aelys +++ /dev/null @@ -1,2 +0,0 @@ -// utils/mod.aelys - Module index file -pub fn utility() { "from mod.aelys" } diff --git a/aelys/tests/modules/nested/main.aelys b/aelys/tests/modules/nested/main.aelys deleted file mode 100644 index 494fd2c..0000000 --- a/aelys/tests/modules/nested/main.aelys +++ /dev/null @@ -1,6 +0,0 @@ -// main.aelys - Nested module imports test -needs utils.math -needs utils.string as str - -print(math.square(4)) // Should print 16 -print(str.repeat("ab", 3)) // Should print ababab diff --git a/aelys/tests/modules/nested/utils/math.aelys b/aelys/tests/modules/nested/utils/math.aelys deleted file mode 100644 index e1123fb..0000000 --- a/aelys/tests/modules/nested/utils/math.aelys +++ /dev/null @@ -1,3 +0,0 @@ -// utils/math.aelys - Math utility module -pub fn square(x) { x * x } -pub fn cube(x) { x * x * x } diff --git a/aelys/tests/modules/nested/utils/string.aelys b/aelys/tests/modules/nested/utils/string.aelys deleted file mode 100644 index b3ba245..0000000 --- a/aelys/tests/modules/nested/utils/string.aelys +++ /dev/null @@ -1,10 +0,0 @@ -// utils/string.aelys - String utility module -pub fn repeat(s, n) { - let mut result = "" - let mut i = 0 - while i < n { - result = result + s - i = i + 1 - } - result -} diff --git a/aelys/tests/narrowing_stress_tests.rs b/aelys/tests/narrowing_stress_tests.rs new file mode 100644 index 0000000..2ad5d6e --- /dev/null +++ b/aelys/tests/narrowing_stress_tests.rs @@ -0,0 +1,623 @@ +/// Stress tests for literal narrowing edge cases. +/// These probe boundaries the happy-path tests never touch. +use aelys_driver::compile_file_with_llvm; +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_opt::OptimizationLevel; +use aelys_sema::TypeInference; +use aelys_syntax::Source; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use std::fs; +use tempfile::tempdir; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn compile_to_verified_ir(source: &str) -> String { + let dir = tempdir().expect("tempdir should be created"); + let source_path = dir.path().join("module.aelys"); + fs::write(&source_path, source).expect("source should be written"); + compile_file_with_llvm(&source_path, OptimizationLevel::None, true) + .expect("llvm backend compilation should succeed"); + let ll_path = source_path.with_extension("ll"); + let ir = fs::read_to_string(&ll_path).expect("llvm ir file should be generated"); + let context = Context::create(); + let buffer = MemoryBuffer::create_from_file(&ll_path).expect("llvm ir should be readable"); + let module = context + .create_module_from_ir(buffer) + .expect("llvm ir should parse into a module"); + module + .verify() + .expect("module.verify() should succeed for generated ir"); + ir +} + +#[test] +fn return_i32_literal_zero() { + assert!( + sema_ok("fn f() -> i32 { return 0 }"), + "return 0 in i32 fn should pass sema" + ); +} + +#[test] +fn return_i32_literal_positive() { + assert!( + sema_ok("fn f() -> i32 { return 42 }"), + "return 42 in i32 fn should pass sema" + ); +} + +#[test] +fn return_i32_literal_max() { + assert!( + sema_ok("fn f() -> i32 { return 2147483647 }"), + "return i32::MAX should pass sema" + ); +} + +#[test] +fn return_i32_literal_overflow() { + assert!( + !sema_ok("fn f() -> i32 { return 2147483648 }"), + "return i32::MAX+1 should FAIL sema" + ); +} + +#[test] +fn return_i8_literal() { + assert!( + sema_ok("fn f() -> i8 { return 127 }"), + "return 127 in i8 fn should pass sema" + ); +} + +#[test] +fn return_i8_literal_overflow() { + assert!( + !sema_ok("fn f() -> i8 { return 128 }"), + "return 128 in i8 fn should FAIL sema" + ); +} + +#[test] +fn return_u8_literal() { + assert!( + sema_ok("fn f() -> u8 { return 255 }"), + "return 255 in u8 fn should pass sema" + ); +} + +#[test] +fn return_u8_literal_overflow() { + assert!( + !sema_ok("fn f() -> u8 { return 256 }"), + "return 256 in u8 fn should FAIL sema" + ); +} + +#[test] +fn return_negative_i32() { + // -1 is parsed as Unary(Neg, Int(1)) not as Int(-1). try_narrow_literal only matches TypedExprKind::Int so this will not be narrowed, producing Mismatch{I64, I32}. + let ok = sema_ok("fn f() -> i32 { return -1 }"); + assert!( + ok, + "return -1 in i32 fn should pass sema (narrowing bug if this fails)" + ); +} + +#[test] +fn return_negative_i8() { + let ok = sema_ok("fn f() -> i8 { return -1 }"); + assert!( + ok, + "return -1 in i8 fn should pass sema (narrowing BUG if this fails)" + ); +} + +#[test] +fn return_binop_i32() { + // 1 + 2 -> both default to I64, result is I64. Return type is I32. + // no narrowing for BinOp results + let ok = sema_ok("fn f() -> i32 { return 1 + 2 }"); + assert!( + ok, + "return 1+2 in i32 fn should pass sema (binop narrowing BUG if this fails)" + ); +} + +#[test] +fn return_i32_param_plus_literal() { + // a is i32 param, 1 defaults to i64 -> binop constraint i32 == i64 --> fail ? + let ok = sema_ok("fn f(a: i32) -> i32 { return a + 1 }"); + assert!( + ok, + "i32 param + literal should work (binop litteral bug if this fails)" + ); +} + +#[test] +fn let_i32_annotation_literal() { + let ok = sema_ok("fn f() { let x: i32 = 42 }"); + assert!( + ok, + "let x: i32 = 42 should pass sema (let narrowing bug if this fails)" + ); +} + +#[test] +fn let_i8_annotation_literal() { + let ok = sema_ok("fn f() { let x: i8 = 10 }"); + assert!( + ok, + "let x: i8 = 10 should pass sema (let narrowing bug if this fails)" + ); +} + +#[test] +fn array_i32_annotation_with_literals() { + let ok = sema_ok("fn f() { let arr: [i32; 3] = [1, 2, 3] }"); + assert!(ok, "annotated i32 array with literals should pass sema"); +} + +#[test] +fn if_else_both_return_i32_literal() { + let ok = sema_ok( + r#" +fn f(x: bool) -> i32 { + if x { + return 1 + } + return 0 +} +"#, + ); + assert!(ok, "both branches returning i32 literals should pass sema"); +} + +#[test] +fn if_else_return_i32_from_nested() { + let ok = sema_ok( + r#" +fn f(x: bool, y: bool) -> i32 { + if x { + if y { + return 1 + } + return 2 + } + return 3 +} +"#, + ); + assert!(ok, "nested if returns with i32 literals should pass sema"); +} + +#[test] +fn codegen_i32_return_literal() { + let ir = compile_to_verified_ir("fn f() -> i32 { return 42 }"); + assert!( + ir.contains("ret i32"), + "i32 function should return i32, not i64:\n{ir}" + ); +} + +#[test] +fn codegen_i32_if_else_returns() { + let ir = compile_to_verified_ir( + r#" +fn f(x: i32) -> i32 { + if x > 0 { + return 1 + } + return 0 +} +"#, + ); + // every ret in this function should be i32 + for line in ir.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("ret ") && !trimmed.starts_with("ret void") { + assert!( + trimmed.contains("ret i32"), + "expected ret i32 but got: {trimmed}\nFull IR:\n{ir}" + ); + } + } +} + +#[test] +fn return_f32_literal() { + let ok = sema_ok("fn f() -> f32 { return 1.0 }"); + assert!(ok, "return 1.0 in f32 fn should pass sema"); +} + +#[test] +fn return_f32_literal_codegen() { + let ir = compile_to_verified_ir("fn f() -> f32 { return 1.0 }"); + assert!( + ir.contains("ret float"), + "f32 function should return float:\n{ir}" + ); +} + +#[test] +fn assign_i32_literal() { + // let x: i32 = 0; x = 42 assign.rs does not call try_narrow_literal + let ok = sema_ok( + r#" +fn f() { + let mut x: i32 = 0 + x = 42 +} +"#, + ); + assert!( + ok, + "x = 42 where x: i32 should pass sema (ASSIGNMENT NARROWING BUG if fails)" + ); +} + +#[test] +fn assign_i8_literal() { + let ok = sema_ok( + r#" +fn f() { + let mut x: i8 = 0 + x = 10 +} +"#, + ); + assert!( + ok, + "x = 10 where x: i8 should pass sema (ASSIGNMENT NARROWING BUG if fails)" + ); +} + +#[test] +fn struct_field_i32_literal() { + // StructLiteral pushes constraint I64 == I32 without narrowing + let ok = sema_ok( + r#" +struct Foo { x: i32 } +fn f() { let a = Foo { x: 42 } } +"#, + ); + assert!(ok, "struct field i32 init with literal should pass sema"); +} + +#[test] +fn struct_field_i8_literal() { + let ok = sema_ok( + r#" +struct Bar { val: i8 } +fn f() { let b = Bar { val: 10 } } +"#, + ); + assert!(ok, "struct field i8 init with literal should pass sema"); +} + +#[test] +fn implicit_return_i32_literal() { + // implicit return constraint in implicit.rs does not narrow + let ok = sema_ok("fn f() -> i32 { 42 }"); + assert!(ok, "implicit return 42 in i32 fn should pass sema"); +} + +#[test] +fn index_assign_i32_array_literal() { + let ok = sema_ok( + r#" +fn f() { + let mut arr: [i32; 3] = [0, 0, 0] + arr[0] = 42 +} +"#, + ); + assert!(ok, "index assign to i32 array should pass sema"); +} + +#[test] +fn return_binop_chain_i32() { + let ok = sema_ok("fn f() -> i32 { return 1 + 2 + 3 }"); + assert!( + ok, + "return 1+2+3 in i32 fn should pass sema (binop chain bug if fails)" + ); +} + +#[test] +fn return_i32_param_binop_literal_codegen() { + // this should work because narrow_binop_int_literals narrows the literal to match the param type + let ir = compile_to_verified_ir( + r#" +fn add_one(x: i32) -> i32 { + return x + 1 +} +"#, + ); + assert!(ir.contains("add i32"), "should add i32 not i64:\n{ir}"); +} + +#[test] +fn while_loop_i32_counter() { + let ok = sema_ok( + r#" +fn f() -> i32 { + let mut i: i32 = 0 + while i < 10 { + i = i + 1 + } + return i +} +"#, + ); + assert!(ok, "while loop with i32 counter should pass sema"); +} + +#[test] +fn comparison_i32_with_literal() { + // i < 10 where i: i32 and 10 defaults to i64 narrow_binop_int_literals should handle this + let ok = sema_ok( + r#" +fn f(x: i32) -> bool { + return x > 0 +} +"#, + ); + assert!( + ok, + "i32 > 0 should pass sema (literal should narrow to i32)" + ); +} + +#[test] +fn binary_literal_plus_non_narrowable_should_reject() { + // `1 + x` where x is i64 should not be silently narrowed to i32. + // before, try_narrow_literal returned true for identifiers, causing the binary expression to be incorrectly retyped to i32. + let ok = sema_ok( + r#" +fn f(x: i64) -> i32 { + return 1 + x +} +"#, + ); + assert!( + !ok, + "return (1 + x:i64) as i32 must FAIL sema, non-narrowable operand should block binary narrowing" + ); +} + +#[test] +fn binary_non_narrowable_plus_literal_should_reject() { + // Same as above but with operands reversed + let ok = sema_ok( + r#" +fn f(x: i64) -> i32 { + return x + 1 +} +"#, + ); + assert!( + !ok, + "return (x:i64 + 1) as i32 must FAIL sema, non-narrowable operand should block binary narrowing" + ); +} + +#[test] +fn binary_both_literals_still_narrows() { + // pure literal binary expressions should still narrow successfully + let ok = sema_ok("fn f() -> i32 { return 1 + 2 }"); + assert!(ok, "return (1 + 2) in i32 fn should still pass sema"); +} + +#[test] +fn binary_literal_plus_same_type_param_works() { + // a:i32 + 1 should work because narrow_binop_int_literals narrows 1 to i32, so both operands are i32 before try_narrow_literal is ever called from return context + let ok = sema_ok( + r#" +fn f(a: i32) -> i32 { + return a + 1 +} +"#, + ); + assert!( + ok, + "return (a:i32 + 1) as i32 should pass sema, narrow_binop_int_literals handles this" + ); +} + +#[test] +fn unary_non_narrowable_should_not_corrupt() { + // unary on a non-narrowable should not succeed in narrowing -x where x is i64 should not be silently narrowed to i32 + let ok = sema_ok( + r#" +fn negate(x: i64) -> i32 { + return -x +} +"#, + ); + assert!( + !ok, + "return (-x:i64) as i32 must fail sema, non-narrowable in unary should block narrowing" + ); +} + +#[test] +fn binary_add_overflow_i8_must_fail() { + // 100 + 100 = 200, which overflows i8 (-128..127). + let ok = sema_ok("fn f() -> i8 { return 100 + 100 }"); + assert!( + !ok, + "return (100 + 100) as i8 must fail sema, result 200 overflows i8" + ); +} + +#[test] +fn binary_add_within_i8_must_pass() { + // 50 + 50 = 100, which fits in i8 (-128..127). + let ok = sema_ok("fn f() -> i8 { return 50 + 50 }"); + assert!( + ok, + "return (50 + 50) as i8 must pass sema, result 100 fits in i8" + ); +} + +#[test] +fn binary_mul_overflow_i8_must_fail() { + // 100 * 2 = 200, overflows i8. + let ok = sema_ok("fn f() -> i8 { return 100 * 2 }"); + assert!( + !ok, + "return (100 * 2) as i8 must fail sema, result 200 overflows i8" + ); +} + +#[test] +fn binary_sub_overflow_i8_must_fail() { + let ok = sema_ok("fn f() -> i8 { return 127 + 1 }"); + assert!( + !ok, + "return (127 + 1) as i8 must fail sema, result 128 overflows i8" + ); +} + +#[test] +fn binary_sub_within_i8_must_pass() { + let ok = sema_ok("fn f() -> i8 { return 100 - 50 }"); + assert!( + ok, + "return (100 - 50) as i8 must pass sema, result 50 fits in i8" + ); +} + +#[test] +fn binary_add_overflow_i16_must_fail() { + // 30000 + 30000 = 60000, overflows i16 (-32768..32767). + let ok = sema_ok("fn f() -> i16 { return 30000 + 30000 }"); + assert!( + !ok, + "return (30000 + 30000) as i16 must fail sema, result 60000 overflows i16" + ); +} + +#[test] +fn binary_add_within_i16_must_pass() { + let ok = sema_ok("fn f() -> i16 { return 10000 + 10000 }"); + assert!( + ok, + "return (10000 + 10000) as i16 must pass sema, result 20000 fits in i16" + ); +} + +#[test] +fn binary_mul_within_i32_must_pass() { + let ok = sema_ok("fn f() -> i32 { return 1000 * 1000 }"); + assert!( + ok, + "return (1000 * 1000) as i32 must pass sema, result fits in i32" + ); +} + +#[test] +fn struct_field_i32_narrowed_literal_with_constraint() { + // Struct field init with a narrowable literal. Narrowing changes 64 from + // i64 to i32, and the solver must also see the Equal(i32, i32) constraint + // to validate. + let ok = sema_ok( + r#" +struct Pair { a: i32, b: i32 } +fn f() { + let p = Pair { a: 1, b: 2 } +} +"#, + ); + assert!( + ok, + "struct field init with narrowable i32 literals must pass sema" + ); +} + +#[test] +fn struct_field_i8_narrowed_literal_with_constraint() { + // same as above but with i8 to exercise smaller integer narrowing + let ok = sema_ok( + r#" +struct Small { val: i8 } +fn f() { + let s = Small { val: 100 } +} +"#, + ); + assert!( + ok, + "struct field init with narrowable i8 literal must pass sema" + ); +} + +#[test] +fn let_i32_annotation_literal_with_constraint() { + // let with type annotation and literal, narrowing changes 42 from i64 + // to i32, and the solver must see the Equal(i32, i32) constraint too3. + // before, the constraint was skipped when narrowing succeeded + let ok = sema_ok( + r#" +fn f() { + let x: i32 = 42 + let y: i32 = x + 1 +} +"#, + ); + assert!(ok, "let x: i32 = 42 followed by use must pass sema"); +} + +#[test] +fn let_i8_annotation_literal_with_constraint() { + let ok = sema_ok( + r#" +fn f() { + let x: i8 = 10 +} +"#, + ); + assert!(ok, "let x: i8 = 10 must pass sema"); +} + +#[test] +fn struct_field_string_where_i32_expected_rejected() { + let ok = sema_ok( + r#" +struct Typed { val: i32 } +fn f() { + let t = Typed { val: "wrong" } +} +"#, + ); + assert!( + !ok, + "struct field with string where i32 expected must fail sema" + ); +} + +#[test] +fn let_string_where_i64_expected_rejected() { + // solver catches the type mismatch that narrowing cannot fix. + let ok = sema_ok( + r#" +fn f() { + let x: i64 = "bad" +} +"#, + ); + assert!( + !ok, + "let x: i64 = \"bad\" must fail sema, solver catches the mismatch" + ); +} diff --git a/aelys/tests/native_dependency_tests.rs b/aelys/tests/native_dependency_tests.rs deleted file mode 100644 index dbb3557..0000000 --- a/aelys/tests/native_dependency_tests.rs +++ /dev/null @@ -1,107 +0,0 @@ -use aelys_driver::run_file; -use std::fs; -use std::path::{Path, PathBuf}; -use std::process::Command; -use tempfile::TempDir; - -fn build_fixture(dir_name: &str, package_name: &str) -> PathBuf { - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - let crate_dir = manifest_dir.join("tests/fixtures").join(dir_name); - let target_dir = manifest_dir.join("target").join(dir_name); - - let status = Command::new("cargo") - .arg("build") - .arg("--manifest-path") - .arg(crate_dir.join("Cargo.toml")) - .arg("--release") - .env("CARGO_TARGET_DIR", &target_dir) - .status() - .expect("cargo build should run"); - assert!(status.success(), "fixture build failed: {}", dir_name); - - let lib_name = lib_filename(package_name); - let lib_path = target_dir.join("release").join(lib_name); - assert!(lib_path.exists(), "missing built library at {:?}", lib_path); - lib_path -} - -fn lib_filename(package_name: &str) -> String { - let base = package_name.replace('-', "_"); - if cfg!(target_os = "linux") { - format!("lib{}.so", base) - } else if cfg!(target_os = "macos") { - format!("lib{}.dylib", base) - } else if cfg!(target_os = "windows") { - format!("{}.dll", base) - } else { - panic!("unsupported target for native dependency tests"); - } -} - -fn module_ext() -> &'static str { - if cfg!(target_os = "windows") { - "dll" - } else if cfg!(target_os = "macos") { - "dylib" - } else { - "so" - } -} - -fn create_module_env() -> TempDir { - tempfile::tempdir().expect("Failed to create temp dir") -} - -fn write_file(dir: &TempDir, path: &str, content: &str) -> PathBuf { - let file_path = dir.path().join(path); - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).expect("Failed to create parent directories"); - } - fs::write(&file_path, content).expect("Failed to write file"); - file_path -} - -fn copy_module(dir: &TempDir, module_name: &str, lib_path: &Path) { - let dest = dir.path().join(format!("{}.{}", module_name, module_ext())); - fs::copy(lib_path, dest).expect("copy native module"); -} - -#[test] -fn circular_native_dependency_is_rejected() { - let lib_a = build_fixture("native_cycle_a", "aelys-native-cycle-a"); - let lib_b = build_fixture("native_cycle_b", "aelys-native-cycle-b"); - - let dir = create_module_env(); - copy_module(&dir, "cycle_a", &lib_a); - copy_module(&dir, "cycle_b", &lib_b); - - let main_path = write_file(&dir, "main.aelys", "needs cycle_a\n"); - - let err = run_file(&main_path).expect_err("should fail"); - assert!( - err.to_string().contains("circular dependency"), - "unexpected error: {}", - err - ); -} - -#[test] -fn diamond_dependency_version_conflict_is_rejected() { - let lib_a = build_fixture("native_dep_a", "aelys-native-dep-a"); - let lib_b = build_fixture("native_dep_b", "aelys-native-dep-b"); - let lib_c = build_fixture("native_dep_c", "aelys-native-dep-c"); - - let dir = create_module_env(); - copy_module(&dir, "dep_a", &lib_a); - copy_module(&dir, "dep_b", &lib_b); - copy_module(&dir, "dep_c", &lib_c); - - let main_path = write_file(&dir, "main.aelys", "needs dep_a\nneeds dep_c\n"); - - let err = run_file(&main_path).expect_err("should fail"); - assert!( - err.to_string().contains("version conflict"), - "unexpected error: {}", - err - ); -} diff --git a/aelys/tests/native_hot_reload_tests.rs b/aelys/tests/native_hot_reload_tests.rs deleted file mode 100644 index 006df05..0000000 --- a/aelys/tests/native_hot_reload_tests.rs +++ /dev/null @@ -1,130 +0,0 @@ -use aelys_driver::modules::ModuleLoader; -use aelys_runtime::{VM, VmConfig}; -use aelys_syntax::Source; -use aelys_syntax::Span; -use aelys_syntax::{ImportKind, NeedsStmt}; -use std::fs; -use std::path::{Path, PathBuf}; -use std::process::Command; -use tempfile::TempDir; - -fn build_fixture(dir_name: &str, package_name: &str) -> PathBuf { - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - let crate_dir = manifest_dir.join("tests/fixtures").join(dir_name); - let target_dir = manifest_dir.join("target").join(dir_name); - - let status = Command::new("cargo") - .arg("build") - .arg("--manifest-path") - .arg(crate_dir.join("Cargo.toml")) - .arg("--release") - .env("CARGO_TARGET_DIR", &target_dir) - .status() - .expect("cargo build should run"); - assert!(status.success(), "fixture build failed: {}", dir_name); - - let lib_name = lib_filename(package_name); - let lib_path = target_dir.join("release").join(lib_name); - assert!(lib_path.exists(), "missing built library at {:?}", lib_path); - lib_path -} - -fn lib_filename(package_name: &str) -> String { - let base = package_name.replace('-', "_"); - if cfg!(target_os = "linux") { - format!("lib{}.so", base) - } else if cfg!(target_os = "macos") { - format!("lib{}.dylib", base) - } else if cfg!(target_os = "windows") { - format!("{}.dll", base) - } else { - panic!("unsupported target for native hot reload tests"); - } -} - -fn module_ext() -> &'static str { - if cfg!(target_os = "windows") { - "dll" - } else if cfg!(target_os = "macos") { - "dylib" - } else { - "so" - } -} - -fn create_module_env() -> TempDir { - tempfile::tempdir().expect("Failed to create temp dir") -} - -fn write_file(dir: &TempDir, path: &str, content: &str) -> PathBuf { - let file_path = dir.path().join(path); - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).expect("Failed to create parent directories"); - } - fs::write(&file_path, content).expect("Failed to write file"); - file_path -} - -#[test] -fn hot_reload_requires_dev_flag() { - let lib_a = build_fixture("native_hot_a", "aelys-native-hot-a"); - let lib_b = build_fixture("native_hot_b", "aelys-native-hot-b"); - - let dir = create_module_env(); - let module_path = dir.path().join(format!("hot_mod.{}", module_ext())); - fs::copy(&lib_a, &module_path).expect("copy hot module A"); - - let entry_path = write_file(&dir, "main.aelys", "needs hot_mod\n"); - let source = Source::new(entry_path.display().to_string(), "needs hot_mod\n"); - - let config = VmConfig::default(); - let mut vm = - VM::with_config_and_args(source.clone(), config, Vec::new()).expect("vm should initialize"); - let mut loader = ModuleLoader::new(&entry_path, source); - - let needs = NeedsStmt { - path: vec!["hot_mod".to_string()], - kind: ImportKind::Module { alias: None }, - span: Span::dummy(), - }; - - loader - .load_module(&needs, &mut vm) - .expect("initial native module load should succeed"); - - replace_module_binary(&module_path, &lib_b); - - let err = loader - .load_module(&needs, &mut vm) - .err() - .expect("should fail when hot reload is disabled"); - assert!( - err.to_string().contains("hot reload disabled"), - "unexpected error: {}", - err - ); -} - -fn replace_module_binary(dest: &Path, new_lib: &Path) { - #[cfg(target_os = "windows")] - { - let parent = dest.parent().expect("module path should have parent"); - let stem = dest - .file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("module"); - let ext = dest.extension().and_then(|s| s.to_str()).unwrap_or("dll"); - let unique = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_nanos(); - let instance = parent.join(format!("{}.{}.{}", stem, unique, ext)); - fs::copy(new_lib, &instance).expect("write hot module instance"); - } - #[cfg(not(target_os = "windows"))] - { - let temp_path = dest.with_extension(format!("{}.tmp", module_ext())); - fs::copy(new_lib, &temp_path).expect("stage hot module replacement"); - fs::rename(&temp_path, dest).expect("replace hot module with new binary"); - } -} diff --git a/aelys/tests/native_loader_tests.rs b/aelys/tests/native_loader_tests.rs deleted file mode 100644 index cf0f928..0000000 --- a/aelys/tests/native_loader_tests.rs +++ /dev/null @@ -1,43 +0,0 @@ -use aelys_modules::native::NativeLoader; -use std::path::{Path, PathBuf}; -use std::process::Command; - -fn build_test_cdylib() -> PathBuf { - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - let crate_dir = manifest_dir.join("tests/fixtures/native_test"); - let target_dir = manifest_dir.join("target/native_test"); - - let status = Command::new("cargo") - .arg("build") - .arg("--manifest-path") - .arg(crate_dir.join("Cargo.toml")) - .arg("--release") - .env("CARGO_TARGET_DIR", &target_dir) - .status() - .expect("cargo build should run"); - assert!(status.success(), "native test crate build failed"); - - let lib_name = if cfg!(target_os = "linux") { - "libaelys_native_test.so" - } else if cfg!(target_os = "macos") { - "libaelys_native_test.dylib" - } else if cfg!(target_os = "windows") { - "aelys_native_test.dll" - } else { - panic!("unsupported target for native loader test"); - }; - - let lib_path = target_dir.join("release").join(lib_name); - assert!(lib_path.exists(), "missing built library at {:?}", lib_path); - lib_path -} - -#[test] -fn load_native_descriptor_and_exports() { - let loader = NativeLoader::new(); - let lib_path = build_test_cdylib(); - let module = loader - .load_dynamic("native_test", &lib_path) - .expect("load native module"); - assert!(module.exports.contains_key("add")); -} diff --git a/aelys/tests/native_module_tests.rs b/aelys/tests/native_module_tests.rs deleted file mode 100644 index ca3d790..0000000 --- a/aelys/tests/native_module_tests.rs +++ /dev/null @@ -1,79 +0,0 @@ -use aelys_driver::run_file; -use std::fs::{self, File}; -use std::io::Write; -use std::path::{Path, PathBuf}; -use std::process::Command; -use tempfile::TempDir; - -fn build_test_cdylib() -> PathBuf { - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - let crate_dir = manifest_dir.join("tests/fixtures/native_test"); - let target_dir = manifest_dir.join("target/native_test"); - - let status = Command::new("cargo") - .arg("build") - .arg("--manifest-path") - .arg(crate_dir.join("Cargo.toml")) - .arg("--release") - .env("CARGO_TARGET_DIR", &target_dir) - .status() - .expect("cargo build should run"); - assert!(status.success(), "native test crate build failed"); - - let lib_name = if cfg!(target_os = "linux") { - "libaelys_native_test.so" - } else if cfg!(target_os = "macos") { - "libaelys_native_test.dylib" - } else if cfg!(target_os = "windows") { - "aelys_native_test.dll" - } else { - panic!("unsupported target for native module test"); - }; - - let lib_path = target_dir.join("release").join(lib_name); - assert!(lib_path.exists(), "missing built library at {:?}", lib_path); - lib_path -} - -fn create_module_env() -> TempDir { - tempfile::tempdir().expect("Failed to create temp dir") -} - -fn write_file(dir: &TempDir, path: &str, content: &str) -> PathBuf { - let file_path = dir.path().join(path); - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).expect("Failed to create parent directories"); - } - let mut file = File::create(&file_path).expect("Failed to create file"); - write!(file, "{}", content).expect("Failed to write file"); - file_path -} - -#[test] -fn script_imports_native_module() { - let dir = create_module_env(); - let lib_path = build_test_cdylib(); - - let ext = if cfg!(target_os = "windows") { - "dll" - } else if cfg!(target_os = "macos") { - "dylib" - } else { - "so" - }; - - let module_lib_path = dir.path().join(format!("native_test.{}", ext)); - fs::copy(&lib_path, &module_lib_path).expect("copy native module"); - - let main_path = write_file( - &dir, - "main.aelys", - r#" -needs native_test -native_test.add(5, 5) -"#, - ); - - let result = run_file(&main_path).expect("native module import should succeed"); - assert_eq!(result.as_int(), Some(10)); -} diff --git a/aelys/tests/native_tamper_tests.rs b/aelys/tests/native_tamper_tests.rs deleted file mode 100644 index a9b2947..0000000 --- a/aelys/tests/native_tamper_tests.rs +++ /dev/null @@ -1,98 +0,0 @@ -use aelys_driver::run_file; -use std::fs; -use std::path::{Path, PathBuf}; -use std::process::Command; -use tempfile::TempDir; - -fn build_fixture(dir_name: &str, package_name: &str) -> PathBuf { - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - let crate_dir = manifest_dir.join("tests/fixtures").join(dir_name); - let target_dir = manifest_dir.join("target").join(dir_name); - - let status = Command::new("cargo") - .arg("build") - .arg("--manifest-path") - .arg(crate_dir.join("Cargo.toml")) - .arg("--release") - .env("CARGO_TARGET_DIR", &target_dir) - .status() - .expect("cargo build should run"); - assert!(status.success(), "fixture build failed: {}", dir_name); - - let lib_name = lib_filename(package_name); - let lib_path = target_dir.join("release").join(lib_name); - assert!(lib_path.exists(), "missing built library at {:?}", lib_path); - lib_path -} - -fn lib_filename(package_name: &str) -> String { - let base = package_name.replace('-', "_"); - if cfg!(target_os = "linux") { - format!("lib{}.so", base) - } else if cfg!(target_os = "macos") { - format!("lib{}.dylib", base) - } else if cfg!(target_os = "windows") { - format!("{}.dll", base) - } else { - panic!("unsupported target for native tamper tests"); - } -} - -fn module_ext() -> &'static str { - if cfg!(target_os = "windows") { - "dll" - } else if cfg!(target_os = "macos") { - "dylib" - } else { - "so" - } -} - -fn create_module_env() -> TempDir { - tempfile::tempdir().expect("Failed to create temp dir") -} - -fn write_file(dir: &TempDir, path: &str, content: &str) -> PathBuf { - let file_path = dir.path().join(path); - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).expect("Failed to create parent directories"); - } - fs::write(&file_path, content).expect("Failed to write file"); - file_path -} - -#[test] -fn invalid_exports_hash_is_rejected() { - let lib = build_fixture("native_tamper", "aelys-native-tamper"); - let dir = create_module_env(); - - let dest = dir.path().join(format!("tamper.{}", module_ext())); - fs::copy(&lib, &dest).expect("copy native module"); - - let main_path = write_file(&dir, "main.aelys", "needs tamper\n"); - - let err = run_file(&main_path).expect_err("should fail"); - assert!( - err.to_string().contains("exports_hash"), - "unexpected error: {}", - err - ); -} - -#[test] -fn zero_exports_hash_is_rejected() { - let lib = build_fixture("native_zero_hash", "aelys-native-zero-hash"); - let dir = create_module_env(); - - let dest = dir.path().join(format!("zero_hash.{}", module_ext())); - fs::copy(&lib, &dest).expect("copy native module"); - - let main_path = write_file(&dir, "main.aelys", "needs zero_hash\n"); - - let err = run_file(&main_path).expect_err("should fail"); - assert!( - err.to_string().contains("exports_hash"), - "unexpected error: {}", - err - ); -} diff --git a/aelys/tests/object_tests.rs b/aelys/tests/object_tests.rs deleted file mode 100644 index 3eb23ce..0000000 --- a/aelys/tests/object_tests.rs +++ /dev/null @@ -1,70 +0,0 @@ -//! Tests for Aelys VM objects - -use aelys_runtime::{ - AelysFunction, AelysString, Function, GcObject, GcRef, NativeFunction, ObjectKind, -}; - -#[test] -fn test_aelys_string_creation() { - let s = AelysString::new("hello"); - assert_eq!(s.as_str(), "hello"); - assert_eq!(s.len(), 5); - assert!(!s.is_empty()); -} - -#[test] -fn test_aelys_string_hash() { - let s1 = AelysString::new("test"); - let s2 = AelysString::new("test"); - let s3 = AelysString::new("different"); - - assert_eq!(s1.hash(), s2.hash()); - assert_ne!(s1.hash(), s3.hash()); -} - -#[test] -fn test_aelys_string_equality() { - let s1 = AelysString::new("equal"); - let s2 = AelysString::new("equal"); - let s3 = AelysString::new("not equal"); - - assert_eq!(s1, s2); - assert_ne!(s1, s3); -} - -#[test] -fn test_gc_ref() { - let ref1 = GcRef::new(42); - let ref2 = GcRef::from(42); - - assert_eq!(ref1, ref2); - assert_eq!(ref1.index(), 42); - assert_eq!(usize::from(ref1), 42); -} - -#[test] -fn test_gc_object() { - let obj = GcObject::new(ObjectKind::String(AelysString::new("test"))); - assert!(!obj.marked); - - match &obj.kind { - ObjectKind::String(s) => assert_eq!(s.as_str(), "test"), - _ => panic!("Expected String variant"), - } -} - -#[test] -fn test_aelys_function() { - let func = Function::new(Some("test_fn".to_string()), 2); - let aelys_func = AelysFunction::new(func); - - assert_eq!(aelys_func.name(), Some("test_fn")); - assert_eq!(aelys_func.arity(), 2); -} - -#[test] -fn test_native_function() { - let native = NativeFunction::new("test", 0); - assert_eq!(native.name, "test"); - assert_eq!(native.arity, 0); -} diff --git a/aelys/tests/optimizer_tests.rs b/aelys/tests/optimizer_tests.rs deleted file mode 100644 index 1444e09..0000000 --- a/aelys/tests/optimizer_tests.rs +++ /dev/null @@ -1,603 +0,0 @@ -use aelys::run_with_config_and_opt; -use aelys_opt::OptimizationLevel; -use aelys_runtime::{Value, VmConfig}; - -fn run_with_opt(code: &str, level: OptimizationLevel) -> Value { - run_with_config_and_opt(code, "", VmConfig::default(), Vec::new(), level) - .expect("Code should execute successfully") -} - -#[test] -fn optimizer_runs_on_typed_program() { - let src = aelys_syntax::Source::new("", "fn f() -> int { 1 + 2 }"); - let tokens = aelys_frontend::lexer::Lexer::with_source(src.clone()) - .scan() - .unwrap(); - let ast = aelys_frontend::parser::Parser::new(tokens, src.clone()) - .parse() - .unwrap(); - let typed = aelys_sema::TypeInference::infer_program(ast, src).unwrap(); - let mut opt = aelys_opt::Optimizer::new(aelys_opt::OptimizationLevel::Standard); - let _ = opt.optimize(typed); -} - -#[test] -fn test_constant_fold_int_arithmetic() { - // Test that constant folding produces correct results - let result = run_with_opt("2 + 3 * 4", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(14)); - - let result = run_with_opt("(10 - 5) / 2", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(2)); - - let result = run_with_opt("100 % 7", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_constant_fold_float_arithmetic() { - let result = run_with_opt("1.5 + 2.5", OptimizationLevel::Basic); - assert_eq!(result.as_float(), Some(4.0)); - - let result = run_with_opt("10.0 / 4.0", OptimizationLevel::Basic); - assert_eq!(result.as_float(), Some(2.5)); -} - -#[test] -fn test_constant_fold_comparisons() { - let result = run_with_opt("1 < 2", OptimizationLevel::Basic); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_with_opt("5 > 10", OptimizationLevel::Basic); - assert_eq!(result.as_bool(), Some(false)); - - let result = run_with_opt("3 == 3", OptimizationLevel::Basic); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_with_opt("4 != 4", OptimizationLevel::Basic); - assert_eq!(result.as_bool(), Some(false)); -} - -#[test] -fn test_constant_fold_string_concat() { - let code = r#" - let s = "Hello" + " " + "World" - s - "#; - let result = run_with_opt(code, OptimizationLevel::Basic); - // The result should be "Hello World" - assert!(result.is_ptr()); -} - -#[test] -fn test_constant_fold_unary() { - let result = run_with_opt("-(-5)", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(5)); - - // Aelys uses 'not' instead of '!' - let result = run_with_opt("not false", OptimizationLevel::Basic); - assert_eq!(result.as_bool(), Some(true)); - - let result = run_with_opt("not not true", OptimizationLevel::Basic); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_constant_fold_nested() { - // Deeply nested constant expressions - let result = run_with_opt( - "((1 + 2) * (3 + 4)) - ((5 - 2) * 2)", - OptimizationLevel::Basic, - ); - // (3 * 7) - (3 * 2) = 21 - 6 = 15 - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_no_fold_division_by_zero() { - // Division by zero should not be folded - we test that the optimizer - // doesn't crash when encountering this pattern - // The actual expression uses variables so it's not foldable anyway - let code = r#" - fn divide(a, b) { - a / b - } - divide(10, 2) - "#; - let result = run_with_opt(code, OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_no_fold_overflow() { - // Test that overflow protection works (uses checked arithmetic) - // Use smaller numbers that are within VM's int range - let code = "let x = 100000\nx + 200000"; - let result = run_with_opt(code, OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(300000)); -} - -#[test] -fn test_dce_after_return() { - // Code after return should be eliminated - let code = r#" - fn test() { - return 42 - let x = 100 // Dead code - x + 1 // Dead code - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_dce_constant_if_true() { - // if true should eliminate else branch - let code = r#" - let x = if true { 1 } else { 2 } - x - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn test_dce_constant_if_false() { - // if false should eliminate then branch - let code = r#" - let x = if false { 1 } else { 2 } - x - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_dce_while_false() { - // while false loop body is dead - let code = r#" - let mut x = 10 - while false { - x++ // Never executed - } - x - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(10)); -} - -#[test] -fn test_opt_level_none() { - // O0 should still produce correct results - let result = run_with_opt("2 + 3", OptimizationLevel::None); - assert_eq!(result.as_int(), Some(5)); -} - -#[test] -fn test_opt_level_basic() { - // O1 should do constant folding - let result = run_with_opt("2 + 3 * 4", OptimizationLevel::Basic); - assert_eq!(result.as_int(), Some(14)); -} - -#[test] -fn test_opt_level_standard() { - // O2 should do constant folding + DCE - let code = r#" - fn test() { - if false { return 0 } - 1 + 2 + 3 - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(6)); -} - -#[test] -fn test_opt_level_aggressive() { - // O3 should do multiple passes - let result = run_with_opt("((1 + 1) * (2 + 2)) / 2", OptimizationLevel::Aggressive); - // (2 * 4) / 2 = 4 - assert_eq!(result.as_int(), Some(4)); -} - -#[test] -fn test_optimization_preserves_semantics_simple() { - let code = "let x = 10\nlet y = 20\nx + y"; - - let result_o0 = run_with_opt(code, OptimizationLevel::None); - let result_o2 = run_with_opt(code, OptimizationLevel::Standard); - let result_o3 = run_with_opt(code, OptimizationLevel::Aggressive); - - assert_eq!(result_o0.as_int(), result_o2.as_int()); - assert_eq!(result_o2.as_int(), result_o3.as_int()); -} - -#[test] -fn test_optimization_preserves_semantics_function() { - let code = r#" - fn factorial(n) { - if n <= 1 { return 1 } - n * factorial(n - 1) - } - factorial(5) - "#; - - let result_o0 = run_with_opt(code, OptimizationLevel::None); - let result_o2 = run_with_opt(code, OptimizationLevel::Standard); - - assert_eq!(result_o0.as_int(), Some(120)); - assert_eq!(result_o2.as_int(), Some(120)); -} - -#[test] -fn test_optimization_preserves_semantics_loops() { - let code = r#" - let mut sum = 0 - for i in 1..5 { - sum += i - } - sum - "#; - - let result_o0 = run_with_opt(code, OptimizationLevel::None); - let result_o2 = run_with_opt(code, OptimizationLevel::Standard); - - // 1 + 2 + 3 + 4 = 10 - assert_eq!(result_o0.as_int(), Some(10)); - assert_eq!(result_o2.as_int(), Some(10)); -} - -#[test] -fn test_optimization_preserves_semantics_closures() { - // Aelys uses fn(y) {} for lambda expressions - let code = r#" - fn make_adder(x) { - return fn(y) { return x + y } - } - let add5 = make_adder(5) - add5(10) - "#; - - let result_o0 = run_with_opt(code, OptimizationLevel::None); - let result_o2 = run_with_opt(code, OptimizationLevel::Standard); - - assert_eq!(result_o0.as_int(), Some(15)); - assert_eq!(result_o2.as_int(), Some(15)); -} - -#[test] -fn test_register_reuse_chain() { - // Chain of operations should not use too many registers - let code = "1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10"; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(55)); -} - -#[test] -fn test_register_reuse_with_locals() { - let code = r#" - let a = 1 - let b = 2 - let c = 3 - a + b + c + a + b + c - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(12)); -} - -#[test] -fn test_empty_function() { - let code = r#" - fn empty() {} - empty() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert!(result.is_null()); -} - -#[test] -fn test_nested_if_constant() { - // Simple nested if test - let code = r#" - let x = if true { 10 } else { 20 } - let y = if false { 30 } else { 40 } - x + y - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(50)); // 10 + 40 -} - -#[test] -fn test_mixed_types_in_expression() { - // Int + Float should promote to float - let result = run_with_opt("1 + 2.5", OptimizationLevel::Basic); - assert_eq!(result.as_float(), Some(3.5)); -} - -#[test] -fn test_global_const_prop_simple_literal() { - let code = r#" - let X: int = 42 - X + 10 - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(52)); -} - -#[test] -fn test_global_const_prop_simple_dependency() { - let code = r#" - let A: int = 10 - let B: int = A * 2 - B + 5 - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn test_global_const_prop_chained_dependency() { - let code = r#" - let A: int = 10 - let B: int = A * 2 - let C: int = B + 5 - C - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn test_global_const_prop_mutable_not_propagated() { - let code = r#" - let mut X: int = 42 - X++ - X - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(43)); -} - -#[test] -fn test_global_const_prop_complex_expression() { - let code = r#" - let WIDTH: float = 80.0 - let R1: float = 1.0 - let R2: float = 2.0 - let K2: float = 5.0 - let K1: float = WIDTH * K2 * 3.0 / (8.0 * (R1 + R2)) - K1 - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - // WIDTH * K2 * 3.0 / (8.0 * (R1 + R2)) - // = 80 * 5.0 * 3.0 / (8.0 * 3.0) - // = 1200.0 / 24.0 - // = 50.0 - assert_eq!(result.as_float(), Some(50.0)); -} - -#[test] -fn test_global_const_prop_float_globals() { - let code = r#" - let PI: float = 3.14159 - let RADIUS: float = 2.0 - let AREA: float = PI * RADIUS * RADIUS - AREA - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - let expected = std::f64::consts::PI * 2.0 * 2.0; - assert!((result.as_float().unwrap() - expected).abs() < 0.0001); -} - -#[test] -fn test_global_const_prop_in_function() { - let code = r#" - let MULTIPLIER: int = 10 - fn scale(x: int) -> int { - x * MULTIPLIER - } - scale(5) - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(50)); -} - -#[test] -fn test_global_const_prop_non_propagable_call() { - let code = r#" - fn get_value() -> int { 42 } - let X: int = get_value() - let Y: int = 10 - Y + 5 - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_global_const_prop_bool_constant() { - let code = r#" - let DEBUG: bool = true - let result: int = if DEBUG { 100 } else { 200 } - result - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(100)); -} - -#[test] -fn test_global_const_prop_string_constant() { - let code = r#" - let PREFIX: string = "Hello" - let SUFFIX: string = "World" - let MSG: string = PREFIX + " " + SUFFIX - MSG - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert!(result.is_ptr()); -} - -#[test] -fn test_fold_grouping_in_binary() { - // Test that (8.0 * (1.0 + 2.0)) folds completely - let code = "8.0 * (1.0 + 2.0)"; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_float(), Some(24.0)); -} - -#[test] -fn test_fold_nested_grouping_division() { - // Test that 1200.0 / (8.0 * (1.0 + 2.0)) folds completely - let code = "1200.0 / (8.0 * (1.0 + 2.0))"; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_float(), Some(50.0)); -} - -#[test] -fn test_local_const_prop_simple() { - let code = r#" - fn test() -> int { - let x = 5 - let y = 10 - x + y - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_local_const_prop_chained() { - let code = r#" - fn test() -> int { - let a = 10 - let b = a * 2 - let c = b + 5 - c - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn test_local_const_prop_deep_chain() { - let code = r#" - fn test() -> int { - let x = 100 - let y = x * 2 - let z = y + 50 - z / 5 - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(50)); -} - -#[test] -fn test_local_const_prop_mutable_not_propagated() { - let code = r#" - fn test() -> int { - let mut x = 5 - x++ - x - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(6)); -} - -#[test] -fn test_local_const_prop_in_nested_if() { - // tests that outer constants are propagated into nested if-blocks - let code = r#" - fn test() -> int { - let base = 10 - let multiplier = 3 - if true { - base * multiplier - } else { - 0 - } - } - test() - "#; - // base * multiplier = 30 - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_local_const_prop_with_conditionals() { - let code = r#" - fn test(flag: bool) -> int { - let base = 10 - if flag { - let factor = 5 - base * factor - } else { - let offset = 3 - base + offset - } - } - test(true) + test(false) - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(63)); // 50 + 13 -} - -#[test] -fn test_local_const_prop_preserves_semantics() { - let code = r#" - fn calculate() -> int { - let a = 5 - let b = 10 - a + b * 2 - } - calculate() - "#; - let result_o0 = run_with_opt(code, OptimizationLevel::None); - let result_o3 = run_with_opt(code, OptimizationLevel::Aggressive); - assert_eq!(result_o0.as_int(), result_o3.as_int()); - assert_eq!(result_o3.as_int(), Some(25)); -} - -#[test] -fn test_local_const_prop_lambda() { - let code = r#" - fn test() -> int { - let multiplier = 3 - let f = fn(x: int) -> int { - let offset = 10 - x * multiplier + offset - } - f(5) - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - assert_eq!(result.as_int(), Some(25)); // 5 * 3 + 10 -} - -#[test] -fn test_local_const_prop_float() { - let code = r#" - fn test() -> float { - let pi = 3.14159 - let r = 2.0 - pi * r * r - } - test() - "#; - let result = run_with_opt(code, OptimizationLevel::Standard); - let expected = std::f64::consts::PI * 2.0 * 2.0; - assert!((result.as_float().unwrap() - expected).abs() < 0.0001); -} diff --git a/aelys/tests/pipeline_tests.rs b/aelys/tests/pipeline_tests.rs deleted file mode 100644 index 7f71cf2..0000000 --- a/aelys/tests/pipeline_tests.rs +++ /dev/null @@ -1,136 +0,0 @@ -//! Tests for the compilation pipeline. - -use aelys_driver::pipeline::{ - CompilerStage, LexerStage, ParserStage, Pipeline, TypeInferenceStage, VMStage, - standard_pipeline, -}; -use aelys_driver::run_source; - -#[test] -fn test_standard_pipeline_simple_arithmetic() { - let mut pipeline = standard_pipeline(); - let result = pipeline.execute_str("test", "1 + 2"); - assert!(result.is_ok()); - assert_eq!(result.unwrap().as_int(), Some(3)); -} - -#[test] -fn test_standard_pipeline_expression() { - let mut pipeline = standard_pipeline(); - let result = pipeline.execute_str("test", "(10 + 5) * 2"); - assert!(result.is_ok()); - assert_eq!(result.unwrap().as_int(), Some(30)); -} - -#[test] -fn test_standard_pipeline_let_binding() { - let mut pipeline = standard_pipeline(); - let result = pipeline.execute_str("test", "let x = 42\nx"); - assert!(result.is_ok()); - assert_eq!(result.unwrap().as_int(), Some(42)); -} - -#[test] -fn test_standard_pipeline_function() { - let mut pipeline = standard_pipeline(); - let result = pipeline.execute_str( - "test", - r#" - fn add(a, b) { - a + b - } - add(3, 4) - "#, - ); - assert!(result.is_ok()); - assert_eq!(result.unwrap().as_int(), Some(7)); -} - -#[test] -fn test_pipeline_caching() { - let mut pipeline = standard_pipeline(); - - // First execution - let result1 = pipeline.execute_str("test", "1 + 2"); - assert!(result1.is_ok()); - assert_eq!(result1.unwrap().as_int(), Some(3)); - - // Cache should have entries now (lexer, parser, type_inference, compiler) - // VM is not cached because it has side effects - assert!(pipeline.cache_size() > 0); - let cache_size_after_first = pipeline.cache_size(); - - // Second execution with same source - should use cache - let result2 = pipeline.execute_str("test", "1 + 2"); - assert!(result2.is_ok()); - assert_eq!(result2.unwrap().as_int(), Some(3)); - - // Cache size should be the same (no new entries) - assert_eq!(pipeline.cache_size(), cache_size_after_first); -} - -#[test] -fn test_pipeline_cache_invalidation() { - let mut pipeline = standard_pipeline(); - - // First execution - let result1 = pipeline.execute_str("test", "1 + 2"); - assert!(result1.is_ok()); - assert_eq!(result1.unwrap().as_int(), Some(3)); - let cache_size_first = pipeline.cache_size(); - - // Different source - new cache entries - let result2 = pipeline.execute_str("test", "10 + 20"); - assert!(result2.is_ok()); - assert_eq!(result2.unwrap().as_int(), Some(30)); - - // Cache should have more entries now - assert!(pipeline.cache_size() > cache_size_first); -} - -#[test] -fn test_pipeline_clear_cache() { - let mut pipeline = standard_pipeline(); - - let result = pipeline.execute_str("test", "1 + 2"); - assert!(result.is_ok()); - assert!(pipeline.cache_size() > 0); - - pipeline.clear_cache(); - assert_eq!(pipeline.cache_size(), 0); -} - -#[test] -fn test_custom_pipeline() { - let mut pipeline = Pipeline::new(); - pipeline.add_stage(Box::new(LexerStage)); - pipeline.add_stage(Box::new(ParserStage)); - pipeline.add_stage(Box::new(TypeInferenceStage::new())); - pipeline.add_stage(Box::new(CompilerStage::new())); - pipeline.add_stage(Box::new(VMStage::new())); - - let result = pipeline.execute_str("test", "5 * 5"); - assert!(result.is_ok()); - assert_eq!(result.unwrap().as_int(), Some(25)); -} - -#[test] -fn test_pipeline_syntax_error() { - let mut pipeline = standard_pipeline(); - let result = pipeline.execute_str("test", "1 +"); - assert!(result.is_err()); -} - -#[test] -fn test_pipeline_type_error() { - let mut pipeline = standard_pipeline(); - // This should cause a type error - undefined variable - let result = pipeline.execute_str("test", "undefined_var"); - assert!(result.is_err()); -} - -#[test] -fn test_driver_run_source() { - let result = run_source("1 + 2", "", None).expect("driver run should succeed"); - assert_eq!(result.as_int(), Some(3)); -} diff --git a/aelys/tests/regression_bugfix_tests.rs b/aelys/tests/regression_bugfix_tests.rs index 5f71582..17fa4c4 100644 --- a/aelys/tests/regression_bugfix_tests.rs +++ b/aelys/tests/regression_bugfix_tests.rs @@ -91,8 +91,8 @@ fn block_ids_coherent_in_while_loop() { let air = lower_source( r#" fn sum(n: i32) -> i32 { - let total: i32 = 0 - let i: i32 = 0 + let mut total: i32 = 0 + let mut i: i32 = 0 while i < n { total = total + i i = i + 1 @@ -143,9 +143,9 @@ fn block_ids_coherent_in_nested_control_flow() { let air = lower_source( r#" fn classify(n: i32) -> i32 { - let result: i32 = 0 + let mut result: i32 = 0 if n > 0 { - let i: i32 = 0 + let mut i: i32 = 0 while i < n { if i > 5 { result = result + 2 @@ -260,8 +260,8 @@ fn const_prop_does_not_substitute_in_while_condition() { let air = lower_optimized( r#" fn sum(n: i32) -> i32 { - let total: i32 = 0 - let i: i32 = 0 + let mut total: i32 = 0 + let mut i: i32 = 0 while i < n { total = total + i i = i + 1 @@ -364,7 +364,7 @@ fn apply(f: fn(i32) -> i32, x: i32) -> i32 { // bug fix: Cast from generic type parameter #[test] -fn generic_cast_does_not_error_at_sema() { +fn generic_cast_from_unconstrained_type_param_is_rejected() { let src = Source::new( "", r#" @@ -379,35 +379,35 @@ fn to_float(x: T) -> f64 { .expect("parse failed"); let result = TypeInference::infer_program(stmts, src); assert!( - result.is_ok(), - "casting generic T to f64 should not produce a sema error, got: {:?}", + result.is_err(), + "casting unconstrained generic T to f64 should be rejected, got: {:?}", result.err() ); } #[test] -fn generic_cast_monomorphizes_correctly() { +fn generic_identity_still_monomorphizes() { let air = lower_source( r#" -fn to_float(x: T) -> f64 { - return x as f64 +fn id(x: T) -> T { + return x } -fn caller() -> f64 { +fn caller() -> i32 { let v: i32 = 42 - return to_float(v) + return id(v) } "#, ); let mut program = air; compute_layouts(&mut program); - let program = monomorphize(program); + let program = monomorphize(program).unwrap(); let mono_fn = program .functions .iter() - .find(|f| f.name.contains("__mono_to_float")); + .find(|f| f.name.contains("__mono_id")); assert!( mono_fn.is_some(), - "to_float should be monomorphized, found: {:?}", + "id should be monomorphized, found: {:?}", program .functions .iter() @@ -456,8 +456,8 @@ fn sum_loop_block_ids_valid() { let air = lower_source( r#" fn sum(n: i32) -> i64 { - let acc: i64 = 0 - let i: i32 = 0 + let mut acc: i64 = 0 + let mut i: i32 = 0 while i < n { acc = acc + i as i64 i = i + 1 diff --git a/aelys/tests/security/allocation_bomb.aelys b/aelys/tests/security/allocation_bomb.aelys deleted file mode 100644 index 8287658..0000000 --- a/aelys/tests/security/allocation_bomb.aelys +++ /dev/null @@ -1,23 +0,0 @@ -// check if we handle OOM (out of memory) error correctly -// crashes if everything is good - -needs std.io - -@no_gc -fn alloc_bomb() { - let mut i = 0 - while i < 10000000000 { - let p = alloc(4) - store(p, 0, i); - // io.print(p) - // no free on purpose - i = i + 1 - } -} - -fn main() { - alloc_bomb() -} - -main() - diff --git a/aelys/tests/security/gc_torture.aelys b/aelys/tests/security/gc_torture.aelys deleted file mode 100644 index e46993d..0000000 --- a/aelys/tests/security/gc_torture.aelys +++ /dev/null @@ -1,17 +0,0 @@ - needs std.io - needs std.string as string - needs to_string from std.convert - needs len from std.string - - fn gc_collect_ok() { - let mut i = 0 - let mut tmp = "seed" - while i < 2000000000 { - tmp = string.repeat("x", 2048 + (i % 64)) + to_string(i) - if i % 5000 == 0 { io.print(len(tmp)) } - i = i + 1 - } - } - - fn main() { gc_collect_ok() } - main() \ No newline at end of file diff --git a/aelys/tests/security/nan_boxing_torture.aelys b/aelys/tests/security/nan_boxing_torture.aelys deleted file mode 100644 index a9bc2a8..0000000 --- a/aelys/tests/security/nan_boxing_torture.aelys +++ /dev/null @@ -1,30 +0,0 @@ -// float -> pointer confusion -// crashes if everything is good - -needs std.io -needs std.convert - -@no_gc -fn nan_poison() { - let p = alloc(2) - store(p, 0, 42) - - // Create NaN - let z: float = 0.0 - let nan = z / z - - // Store NaN where an int/pointer is expected - store(p, 1, nan) - - // Reload and reinterpret - let x = load(p, 1) - io.print("Loaded value: " + convert.to_string(x)) - - free(p) -} - -fn main() { - nan_poison() -} - -main() diff --git a/aelys/tests/security/register_stress.aelys b/aelys/tests/security/register_stress.aelys deleted file mode 100644 index 0b99e48..0000000 --- a/aelys/tests/security/register_stress.aelys +++ /dev/null @@ -1,36 +0,0 @@ -// should run perfectly - -needs std.io -needs std.convert - -fn register_stress() -> int { - let a0 = 0 - let a1 = 1 - let a2 = 2 - let a3 = 3 - let a4 = 4 - let a5 = 5 - let a6 = 6 - let a7 = 7 - let a8 = 8 - let a9 = 9 - let a10 = 10 - let a11 = 11 - let a12 = 12 - let a13 = 13 - let a14 = 14 - let a15 = 15 - let a16 = 16 - let a17 = 17 - let a18 = 18 - let a19 = 19 - - return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + - a10 + a11 + a12 + a13 + a14 + a15 + a16 + a17 + a18 + a19 -} - -fn main() { - io.print(convert.to_string(register_stress())) -} - -main() diff --git a/aelys/tests/security/smash_oob.aelys b/aelys/tests/security/smash_oob.aelys deleted file mode 100644 index 70bff08..0000000 --- a/aelys/tests/security/smash_oob.aelys +++ /dev/null @@ -1,27 +0,0 @@ -// check if the runtime really prevents OOB writes -// crashes if everything is good - -@no_gc -fn smash_oob() { - let fb = alloc(4) - store(fb, 0, 123) - store(fb, 1, 456) - store(fb, 2, 789) - store(fb, 3, 999) - - // OOB writes - let mut i: int = 4 - while i < 40 { - store(fb, i, i * 1337) - i = i + 1 - } - - free(fb) -} - -fn main() { - smash_oob() - io.print("OOB test done") -} - -main() diff --git a/aelys/tests/security/stackoverflow.aelys b/aelys/tests/security/stackoverflow.aelys deleted file mode 100644 index c055e2a..0000000 --- a/aelys/tests/security/stackoverflow.aelys +++ /dev/null @@ -1,19 +0,0 @@ -// should handle stackoverflow -// crashes if everything is good - -needs std.io -needs std.convert - -fn recurse(n: int) -> int { - if n <= 0 { - return 0 - } - return recurse(n - 1) + 1 -} - -fn main() { - let r = recurse(100000) - io.print("Result = " + convert.to_string(r)) -} - -main() diff --git a/aelys/tests/security/use_after_free.aelys b/aelys/tests/security/use_after_free.aelys deleted file mode 100644 index ece1dc6..0000000 --- a/aelys/tests/security/use_after_free.aelys +++ /dev/null @@ -1,31 +0,0 @@ -// pointer already freed check -// crashes if everything is good - -needs std.convert - -@no_gc -fn use_after_free() { - let p = alloc(8) - store(p, 0, 111) - store(p, 1, 222) - - free(p) - - // Still writing after free - store(p, 0, 999) - store(p, 1, 888) - - // Allocate again - let q = alloc(8) - store(q, 0, 333) - store(q, 1, 444) - - io.print("p[0] = " + convert.to_string(load(p, 0))) - io.print("q[0] = " + convert.to_string(load(q, 0))) -} - -fn main() { - use_after_free() -} - -main() diff --git a/aelys/tests/security_audit_tests.rs b/aelys/tests/security_audit_tests.rs deleted file mode 100644 index 77ee871..0000000 --- a/aelys/tests/security_audit_tests.rs +++ /dev/null @@ -1,887 +0,0 @@ -use aelys_bytecode::asm::{BinaryError, deserialize}; -use aelys_common::{AelysError, CompileErrorKind, RuntimeErrorKind}; -use aelys_driver::run_file; -use aelys_runtime::{Function, GlobalLayout, OpCode, VM, Value}; -use aelys_syntax::Source; - -fn make_vm() -> VM { - VM::new(Source::new("test.aelys", "")).unwrap() -} - -fn run_function(vm: &mut VM, mut func: Function) -> Result { - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func)?; - vm.execute(func_ref) -} - -#[test] -fn verify_rejects_register_oob() { - let mut vm = make_vm(); - let mut func = Function::new(Some("oob".to_string()), 0); - func.num_registers = 1; - func.emit_a(OpCode::Move, 1, 0, 0, 1); - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.num_registers = 1; - let func_ref = vm.alloc_function(func).unwrap(); - let err = vm.execute(func_ref).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidBytecode(msg) => assert!(msg.contains("register")), - _ => panic!("expected InvalidBytecode for register OOB"), - } -} - -#[test] -fn verify_rejects_constant_oob() { - let mut vm = make_vm(); - let mut func = Function::new(Some("const_oob".to_string()), 0); - func.num_registers = 1; - func.emit_b(OpCode::LoadK, 0, 1, 1); - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.num_registers = 1; - let func_ref = vm.alloc_function(func).unwrap(); - let err = vm.execute(func_ref).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidBytecode(msg) => assert!(msg.contains("constant")), - _ => panic!("expected InvalidBytecode for constant OOB"), - } -} - -#[test] -fn manual_heap_invalid_handle_is_rejected() { - let mut vm = make_vm(); - let mut func = Function::new(Some("heap_bad_handle".to_string()), 0); - func.num_registers = 3; - func.emit_b(OpCode::LoadI, 0, 123, 1); - func.emit_b(OpCode::LoadI, 1, 0, 1); - func.emit_a(OpCode::LoadMem, 2, 0, 1, 1); - func.emit_a(OpCode::Return, 2, 0, 0, 1); - - let err = run_function(&mut vm, func).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidMemoryHandle => {} - _ => panic!("expected InvalidMemoryHandle for invalid manual heap handle"), - } -} - -#[test] -fn verifier_blocks_gc_untracked_registers() { - let mut vm = make_vm(); - let str_ref = vm.alloc_string("secret").unwrap(); - - let mut func = Function::new(Some("gc_oob".to_string()), 0); - func.num_registers = 1; - func.constants.push(Value::ptr(str_ref.index())); - func.emit_b(OpCode::LoadK, 1, 0, 1); - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.num_registers = 1; - let func_ref = vm.alloc_function(func).unwrap(); - let err = vm.execute(func_ref).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidBytecode(msg) => assert!(msg.contains("register")), - _ => panic!("expected InvalidBytecode for GC register OOB"), - } -} - -#[test] -fn stdlib_exec_denied_without_capability() { - let src = r#" -needs exec from std.sys -exec("echo hi") -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("cap.aelys"); - std::fs::write(&path, src).unwrap(); - let err = run_file(&path).unwrap_err(); - match err { - AelysError::Runtime(runtime) => match runtime.kind { - RuntimeErrorKind::CapabilityDenied { operation } => { - assert_eq!(operation, "sys.exec"); - } - _ => panic!("expected CapabilityDenied for sys.exec"), - }, - _ => panic!("expected runtime error for sys.exec"), - } -} - -#[test] -fn verify_rejects_invalid_opcode() { - let mut vm = make_vm(); - let mut func = Function::new(Some("bad_opcode".to_string()), 0); - func.num_registers = 0; - func.set_bytecode(vec![0xFF00_0000]); - - let err = run_function(&mut vm, func).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidBytecode(msg) => assert!(msg.contains("invalid opcode")), - _ => panic!("expected InvalidBytecode for invalid opcode"), - } -} - -#[test] -fn binary_deserialize_enforces_bytecode_limit() { - let mut bytes = Vec::new(); - bytes.extend_from_slice(b"VBXQ"); - bytes.extend_from_slice(&1u16.to_le_bytes()); - bytes.extend_from_slice(&0u16.to_le_bytes()); - bytes.extend_from_slice(&1u32.to_le_bytes()); - bytes.extend_from_slice(&0u32.to_le_bytes()); - bytes.extend_from_slice(&0u16.to_le_bytes()); // name len - bytes.push(0u8); // arity - bytes.push(0u8); // num_registers - bytes.extend_from_slice(&0u16.to_le_bytes()); // constants - bytes.extend_from_slice(&(1_000_001u32).to_le_bytes()); // bytecode length - - match deserialize(&bytes) { - Err(BinaryError::LimitExceeded { what, .. }) => { - assert_eq!(what, "bytecode length"); - } - Err(other) => panic!("expected LimitExceeded, got {:?}", other), - Ok(_) => panic!("expected failure for oversized bytecode"), - } -} - -#[test] -fn no_gc_underflow_is_rejected() { - let mut vm = make_vm(); - let mut func = Function::new(Some("no_gc_underflow".to_string()), 0); - func.num_registers = 0; - func.emit_a(OpCode::ExitNoGc, 0, 0, 0, 1); - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - let err = run_function(&mut vm, func).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidBytecode(msg) => assert!(msg.contains("no_gc underflow")), - _ => panic!("expected InvalidBytecode for no_gc underflow"), - } -} - -#[test] -fn nan_is_not_treated_as_pointer() { - let value = Value::float(f64::NAN); - assert!(value.is_float()); - assert!(value.as_float().unwrap().is_nan()); - assert!(value.as_ptr().is_none()); -} - -#[test] -fn read_register_without_frame_is_error() { - let vm = make_vm(); - let err = vm.read_register(0).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidBytecode(msg) => assert!(msg.contains("no call frame")), - _ => panic!("expected InvalidBytecode for missing frame"), - } -} - -#[cfg(unix)] -#[test] -fn module_loader_rejects_symlink_escape() { - use aelys_common::CompileErrorKind; - use aelys_driver::modules::ModuleLoader; - use std::fs; - use std::os::unix::fs::symlink; - use tempfile::tempdir; - - let dir = tempdir().unwrap(); - let base = dir.path().join("root"); - let outside = dir.path().join("outside"); - fs::create_dir_all(&base).unwrap(); - fs::create_dir_all(&outside).unwrap(); - - let entry = base.join("main.aelys"); - fs::write(&entry, "needs evil.secret").unwrap(); - - let secret = outside.join("secret.aelys"); - fs::write(&secret, "pub let X = 1").unwrap(); - - let link = base.join("evil"); - symlink(&outside, &link).unwrap(); - - let source = Source::new(entry.to_str().unwrap(), ""); - let loader = ModuleLoader::new(&entry, source); - let err = loader - .resolve_path(&["evil".to_string(), "secret".to_string()]) - .unwrap_err(); - - match err { - AelysError::Compile(err) => match err.kind { - CompileErrorKind::ModuleNotFound { .. } => {} - _ => panic!("expected ModuleNotFound for symlink escape"), - }, - _ => panic!("expected compile error for symlink escape"), - } -} - -#[test] -fn global_mapping_id_ignores_layout_hash_collisions() { - let mut vm = make_vm(); - let mut func_a = Function::new(Some("a".to_string()), 0); - func_a.global_layout = GlobalLayout::new(vec!["first".to_string()]); - func_a.global_layout_hash = 1; - let func_a_ref = vm.alloc_function(func_a).unwrap(); - - let mut func_b = Function::new(Some("b".to_string()), 0); - func_b.global_layout = GlobalLayout::new(vec!["second".to_string()]); - func_b.global_layout_hash = 1; - let func_b_ref = vm.alloc_function(func_b).unwrap(); - - let id_a = vm.get_global_mapping_id(func_a_ref); - let id_b = vm.get_global_mapping_id(func_b_ref); - assert_ne!(id_a, 0); - assert_ne!(id_b, 0); - assert_ne!(id_a, id_b); -} - -#[test] -#[ignore] -fn parser_rejects_deep_expression_recursion() { - let result = std::thread::Builder::new() - .stack_size(32 * 1024 * 1024) // 32MB stack to handle deep parsing - .spawn(|| { - let depth = 1010; - let mut code = "(".repeat(depth); - code.push('1'); - code.push_str(&")".repeat(depth)); - - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("recursion_test.aelys"); - std::fs::write(&path, &code).unwrap(); - - run_file(&path) - }) - .unwrap() - .join() - .unwrap(); - - match result { - Err(AelysError::Compile(e)) => { - assert!( - matches!(e.kind, CompileErrorKind::RecursionDepthExceeded { .. }), - "expected RecursionDepthExceeded, got {:?}", - e.kind - ); - } - Err(e) => panic!("expected RecursionDepthExceeded compile error, got {:?}", e), - Ok(_) => panic!("expected compile error for deep recursion"), - } -} -#[test] -fn lexer_rejects_deep_comment_nesting() { - let depth = 300; - let mut code = "/* ".repeat(depth); - code.push_str(&" */".repeat(depth)); - - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("comment_test.aelys"); - std::fs::write(&path, &code).unwrap(); - - let err = run_file(&path).unwrap_err(); - match err { - AelysError::Compile(e) => { - assert!( - matches!(e.kind, CompileErrorKind::CommentNestingTooDeep { .. }), - "expected CommentNestingTooDeep, got {:?}", - e.kind - ); - } - _ => panic!("expected compile error for deep comment nesting"), - } -} - -#[test] -fn fs_join_rejects_absolute_path() { - let src = r#" -needs std.fs -fs.join("/app", "/etc/passwd") -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("path_traversal.aelys"); - std::fs::write(&path, src).unwrap(); - - let mut config = aelys_runtime::VmConfig::default(); - config.capabilities.allow_fs = true; - - let err = aelys_driver::run_file_with_config(&path, config, Vec::new()).unwrap_err(); - match err { - AelysError::Runtime(runtime) => { - let msg = format!("{:?}", runtime.kind); - assert!( - msg.contains("absolute") || msg.contains("path"), - "expected path traversal error, got: {}", - msg - ); - } - _ => panic!("expected runtime error for path traversal"), - } -} - -#[test] -fn fs_join_rejects_parent_escape() { - let src = r#" -needs std.fs -fs.join("/app/data", "../../../etc/passwd") -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("path_escape.aelys"); - std::fs::write(&path, src).unwrap(); - - let mut config = aelys_runtime::VmConfig::default(); - config.capabilities.allow_fs = true; - - let err = aelys_driver::run_file_with_config(&path, config, Vec::new()).unwrap_err(); - match err { - AelysError::Runtime(runtime) => { - let msg = format!("{:?}", runtime.kind); - assert!( - msg.contains("escapes") || msg.contains("base"), - "expected path escape error, got: {}", - msg - ); - } - _ => panic!("expected runtime error for path escape"), - } -} - -#[test] -fn fs_read_bytes_rejects_huge_buffer() { - let dir = tempfile::tempdir().unwrap(); - let test_file = dir.path().join("test.bin"); - std::fs::write(&test_file, "test content").unwrap(); - - let src = format!( - r#" -needs std.fs -let f = fs.open("{}", "r") -fs.read_bytes(f, 999999999999) -"#, - test_file.display().to_string().replace('\\', "/") - ); - - let path = dir.path().join("huge_buffer.aelys"); - std::fs::write(&path, &src).unwrap(); - - let mut config = aelys_runtime::VmConfig::default(); - config.capabilities.allow_fs = true; - - let result = aelys_driver::run_file_with_config(&path, config, Vec::new()); - match result { - Err(AelysError::Runtime(runtime)) => { - let msg = format!("{:?}", runtime.kind); - assert!( - msg.contains("buffer") - || msg.contains("maximum") - || msg.contains("size") - || msg.contains("max"), - "expected buffer size error, got: {}", - msg - ); - } - Err(e) => panic!("expected runtime error for huge buffer, got: {:?}", e), - Ok(_) => panic!("expected error for huge buffer allocation"), - } -} - -/* - verifies that ForLoopI/WhileLoopLt correctly validate consecutive register ranges - previously, `a + 1` and `a + 2` were computed without overflow protection -*/ -#[test] -fn register_index_overflow_is_handled() { - let mut vm = make_vm(); - - // ForLoopI uses 3 consecutive registers: a, a+1, a+2 - // with num_registers=5 and a=3, registers 3,4,5 would be needed but only 0-4 exist - let mut func = Function::new(Some("forloop_oob".to_string()), 0); - // ForLoopI with a=3 needs r3, r4, r5, but we'll limit to 5 registers (0-4) - // emit_b format: (opcode, reg_a, imm16, line) - func.emit_b(OpCode::ForLoopI, 3, -1, 1); // jump offset doesn't matter for this test - func.emit_a(OpCode::Return0, 0, 0, 0, 1); - - func.finalize_bytecode(); - // override num_registers after finalize to force the oob condition - func.num_registers = 5; // registers 0-4 available, but ForLoopI needs 3,4,5 - let func_ref = vm.alloc_function(func).unwrap(); - let err = vm.execute(func_ref).unwrap_err(); - match err.kind { - RuntimeErrorKind::InvalidBytecode(msg) => { - assert!( - msg.contains("register") || msg.contains("ForLoopI"), - "expected register bounds error, got: {}", - msg - ); - } - _ => panic!( - "expected InvalidBytecode for ForLoopI register OOB, got {:?}", - err.kind - ), - } -} - -#[test] -fn bytecode_rejects_invalid_nested_func_idx() { - let mut bytes = Vec::new(); - bytes.extend_from_slice(b"VBXQ"); // Magic - bytes.extend_from_slice(&1u16.to_le_bytes()); // Version major - bytes.extend_from_slice(&0u16.to_le_bytes()); // Version minor - bytes.extend_from_slice(&1u32.to_le_bytes()); // Flags - bytes.extend_from_slice(&0u32.to_le_bytes()); // Entry point - - // Function 0: - bytes.extend_from_slice(&0u16.to_le_bytes()); // Name length - bytes.push(0u8); // Arity - bytes.push(1u8); // Num registers - bytes.extend_from_slice(&1u16.to_le_bytes()); // 1 constant - - // Constant: TAG_FUNC with invalid index - bytes.push(5u8); // TAG_FUNC - bytes.extend_from_slice(&999u32.to_le_bytes()); // func_idx = 999 (but 0 nested functions) - - bytes.extend_from_slice(&1u32.to_le_bytes()); // Bytecode length - bytes.extend_from_slice(&0u32.to_le_bytes()); // Return0 opcode - - bytes.extend_from_slice(&0u16.to_le_bytes()); // Upvalue descriptors - bytes.extend_from_slice(&0u16.to_le_bytes()); // Line numbers - bytes.extend_from_slice(&0u32.to_le_bytes()); // Nested functions = 0 - bytes.extend_from_slice(&0u16.to_le_bytes()); // Global layout - - match deserialize(&bytes) { - Err(BinaryError::InvalidNestedFunctionIndex { index, max }) => { - assert_eq!(index, 999); - assert!(max < 999); - } - Err(_) => {} - Ok(_) => panic!("expected failure for invalid nested func_idx"), - } -} - -#[test] -fn gc_iterative_marking_handles_deep_closures() { - let src = r#" -fn make_chain(n) { - if n <= 0 { - return fn() { return 0 } - } - let inner = make_chain(n - 1) - return fn() { return inner() + 1 } -} -let chain = make_chain(50) -chain() -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("deep_closures.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(50)); -} - -#[test] -fn call_site_cache_slot_limit_enforced() { - use aelys_runtime::MAX_CALL_SITE_SLOTS; - const { assert!(MAX_CALL_SITE_SLOTS > 0) }; - const { assert!(MAX_CALL_SITE_SLOTS <= 65535) }; -} - -#[test] -fn integer_48bit_bounds_checked() { - use aelys_runtime::Value; - let max_val = Value::int(Value::INT_MAX); - assert_eq!(max_val.as_int(), Some(Value::INT_MAX)); - let min_val = Value::int(Value::INT_MIN); - assert_eq!(min_val.as_int(), Some(Value::INT_MIN)); - assert!(Value::int_checked(Value::INT_MAX + 1).is_err()); - assert!(Value::int_checked(Value::INT_MIN - 1).is_err()); -} - -#[test] -fn module_import_non_existent_symbol_fails() { - let dir = tempfile::tempdir().unwrap(); - let mod_path = dir.path().join("mymod.aelys"); - std::fs::write(&mod_path, "pub let x = 1").unwrap(); - let main_path = dir.path().join("main.aelys"); - std::fs::write(&main_path, "needs nonexistent from mymod\nnonexistent").unwrap(); - let err = run_file(&main_path).unwrap_err(); - match err { - AelysError::Compile(e) => { - let msg = format!("{:?}", e.kind); - assert!( - msg.contains("not found") || msg.contains("Symbol") || msg.contains("nonexistent"), - "expected symbol not found error, got: {}", - msg - ); - } - _ => panic!("expected compile error for non-existent symbol import"), - } -} - -#[test] -fn closure_captures_preserve_values() { - let src = r#" -fn make_adder(n) { - return fn(x) { return n + x } -} -let add5 = make_adder(5) -let add10 = make_adder(10) -add5(3) + add10(7) -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("closures.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn deeply_nested_functions_work() { - let src = r#" -fn l1(a) { - fn l2(b) { - fn l3(c) { - fn l4(d) { - fn l5(e) { - return a + b + c + d + e - } - return l5 - } - return l4 - } - return l3 - } - return l2 -} -l1(1)(2)(3)(4)(5) -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("nested.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn manual_heap_operations_validated() { - let src = r#" - let h = alloc(5) - store(h, 0, 100) - store(h, 4, 400) - let a = load(h, 0) - let b = load(h, 4) - free(h) - a + b - "#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("heap.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(500)); -} - -#[test] -fn recursive_function_handles_deep_calls() { - let src = r#" -fn sum_to(n) { - if n <= 0 { - return 0 - } - return n + sum_to(n - 1) -} -sum_to(100) -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("recursive.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(5050)); -} - -#[test] -fn inline_cache_works_correctly() { - let src = r#" -fn double(x) { - return x * 2 -} -let mut sum = 0 -let mut i = 0 -while i < 1000 { - sum += double(i) - i++ -} -sum -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("cache.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - let expected: i64 = (0..1000).map(|x| x * 2).sum(); - assert_eq!(result.as_int(), Some(expected)); -} - -#[test] -fn compiler_rc_shared_state() { - use aelys_backend::Compiler; - use aelys_syntax::Source; - use std::rc::Rc; - let source = Source::new("", "let x = 1"); - let compiler = Compiler::new(None, source); - let count1 = Rc::strong_count(&compiler.module_aliases); - let count2 = Rc::strong_count(&compiler.known_globals); - let count3 = Rc::strong_count(&compiler.known_native_globals); - assert!(count1 >= 1); - assert!(count2 >= 1); - assert!(count3 >= 1); - let _alias_clone = Rc::clone(&compiler.module_aliases); - assert!(Rc::strong_count(&compiler.module_aliases) >= 2); -} - -#[test] -fn type_inference_rc_shared() { - use aelys_sema::{InferType, TypeEnv}; - use std::rc::Rc; - let mut env = TypeEnv::new(); - let fn_type = Rc::new(InferType::Function { - params: vec![InferType::I64], - ret: Box::new(InferType::I64), - }); - env.define_function("test_fn".to_string(), Rc::clone(&fn_type)); - assert_eq!(Rc::strong_count(&fn_type), 2); - let looked_up = env.lookup_function("test_fn"); - assert!(looked_up.is_some()); -} - -#[test] -fn value_nan_boxing_preserves_types() { - use aelys_runtime::Value; - let int_val = Value::int(42); - assert!(int_val.is_int()); - assert!(!int_val.is_float()); - assert!(!int_val.is_bool()); - assert!(!int_val.is_null()); - let float_val = Value::float(2.72); - assert!(float_val.is_float()); - assert!(!float_val.is_int()); - let bool_val = Value::bool(true); - assert!(bool_val.is_bool()); - let null_val = Value::null(); - assert!(null_val.is_null()); - let ptr_val = Value::ptr(12345); - assert!(ptr_val.is_ptr()); -} - -#[test] -fn arithmetic_operations_work() { - let src = r#" - let a = 10 + 5 - let b = 10 - 5 - let c = 10 * 5 - let d = 10 / 5 - let e = 10 % 3 - a + b + c + d + e - "#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("arith.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(15 + 5 + 50 + 2 + 1)); -} - -#[test] -fn comparison_operations_work() { - let src = r#" -let a = 5 < 10 -let b = 10 > 5 -let c = 5 <= 5 -let d = 5 >= 5 -let e = 5 == 5 -let f = 5 != 6 -let result = if a and b and c and d and e and f { 1 } else { 0 } -result -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("cmp.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn logical_short_circuit_and() { - let src = r#" -let mut called = false -fn side_effect() { - called = true - return true -} -let result = false and side_effect() -let out = if called { 1 } else { 0 } -out -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("short_and.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn logical_short_circuit_or() { - let src = r#" -let mut called = false -fn side_effect() { - called = true - return true -} -let result = true or side_effect() -let out = if called { 1 } else { 0 } -out -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("short_or.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(0)); -} - -#[test] -fn while_loop_with_break() { - let src = r#" -let mut i = 0 -while true { - i++ - if i >= 50 { - break - } -} -i -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("break.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(50)); -} - -#[test] -fn while_loop_with_continue() { - let src = r#" -let mut i = 0 -let mut sum = 0 -while i < 20 { - i++ - if i % 2 == 0 { - continue - } - sum += i -} -sum -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("continue.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - let expected: i64 = (1..=20).filter(|x| x % 2 != 0).sum(); - assert_eq!(result.as_int(), Some(expected)); -} - -#[test] -fn string_concatenation() { - let src = r#" -let s = "hello" + " " + "world" -s.len() -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("strings.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(11)); -} - -#[test] -fn mutual_recursion_works() { - let src = r#" -fn is_even(n) { - if n == 0 { - return true - } - return is_odd(n - 1) -} -fn is_odd(n) { - if n == 0 { - return false - } - return is_even(n - 1) -} -let result = if is_even(50) and is_odd(51) { 1 } else { 0 } -result -"#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("mutual.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn variable_shadowing() { - let src = r#" - let x = 1 - { - let x = 2 - { - let x = 3 - } - } - x - "#; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("shadow.aelys"); - std::fs::write(&path, src).unwrap(); - let result = run_file(&path).unwrap(); - assert_eq!(result.as_int(), Some(1)); -} - -#[test] -fn globals_sync_includes_null_values() { - /* verifies that setting a global to null via one module is visible in another - previously, null values were skipped during sync causing inconsistencies. */ - let dir = tempfile::tempdir().unwrap(); - - let helper = dir.path().join("helper.aelys"); - std::fs::write( - &helper, - r#" -pub let mut shared = 42 - -pub fn set_to_null() { - shared = null -} - -pub fn get_shared() { - return shared -} -"#, - ) - .unwrap(); - - let main = dir.path().join("main.aelys"); - std::fs::write( - &main, - r#" -needs shared, set_to_null, get_shared from helper - -let before = get_shared() -set_to_null() -let after = get_shared() - -if before == 42 and after == null { 1 } else { 0 } -"#, - ) - .unwrap(); - - let result = run_file(&main).unwrap(); - assert_eq!(result.as_int(), Some(1)); -} diff --git a/aelys/tests/sema_adversarial_tests.rs b/aelys/tests/sema_adversarial_tests.rs new file mode 100644 index 0000000..3392dea --- /dev/null +++ b/aelys/tests/sema_adversarial_tests.rs @@ -0,0 +1,659 @@ +//! these are designed to break the Aelys type checker lol +//! each test exercises an edge case or missing validation that could cause miscompilation or unsoundness +//! you get the point. + +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn should_fail(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_err() +} + +fn should_pass(code: &str) { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + match TypeInference::infer_program(stmts, src) { + Ok(_) => {} + Err(errors) => { + for e in &errors { + eprintln!(" ERROR: {}", e); + } + panic!("expected OK, got {} errors", errors.len()); + } + } +} + +#[test] +fn duplicate_struct_field_names_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: i64, x: string } +fn test() {} +"# + ), + "duplicate struct field names should be rejected" + ); +} + +#[test] +fn duplicate_struct_field_same_type_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: i64, x: i64 } +fn test() {} +"# + ), + "duplicate struct fields with same type should still be rejected" + ); +} + +#[test] +fn three_duplicate_fields_rejected() { + assert!( + should_fail( + r#" +struct Bar { a: i64, b: string, a: bool } +fn test() {} +"# + ), + "struct with duplicate field 'a' should be rejected" + ); +} + +#[test] +fn struct_literal_duplicate_field_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: i64 } +fn test() { + let f = Foo { x: 1, x: 2 } +} +"# + ), + "struct literal with duplicate field should be rejected" + ); +} + +#[test] +fn struct_literal_duplicate_field_different_types_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: i64, y: string } +fn test() { + let f = Foo { x: 1, y: "hi", x: 2 } +} +"# + ), + "struct literal with duplicate field (alongside valid fields) should be rejected" + ); +} + +#[test] +fn immutable_variable_reassignment_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = 5 + x = 10 +} +"# + ), + "reassigning an immutable variable should be rejected" + ); +} + +#[test] +fn mutable_variable_reassignment_accepted() { + should_pass( + r#" +fn test() { + let mut x = 5 + x = 10 +} +"#, + ); +} + +#[test] +fn immutable_param_reassignment_rejected() { + assert!( + should_fail( + r#" +fn test(x: i64) { + x = 10 +} +"# + ), + "reassigning an immutable parameter should be rejected" + ); +} + +#[test] +fn mutable_param_reassignment_accepted() { + should_pass( + r#" +fn test(mut x: i64) { + x = 10 +} +"#, + ); +} + +#[test] +fn duplicate_function_definitions_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { return 1 } +fn foo() -> string { return "hello" } +fn test() {} +"# + ), + "duplicate function definitions should be rejected" + ); +} + +#[test] +fn duplicate_function_same_signature_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { return 1 } +fn foo() -> i64 { return 2 } +fn test() {} +"# + ), + "duplicate function definitions with same signature should be rejected" + ); +} + +#[test] +fn duplicate_parameter_names_rejected() { + assert!( + should_fail( + r#" +fn foo(x: i64, x: string) {} +fn test() {} +"# + ), + "duplicate parameter names should be rejected" + ); +} + +#[test] +fn error_recovery_doesnt_mask_subsequent_errors() { + // if the first error poisons a type variable via force_dynamic, a second error on a different variable sharing the same Var should still be caught + assert!( + should_fail( + r#" +fn test() { + let x: i64 = "bad" + let y: string = 42 +} +"# + ), + "both type errors should be caught, not just the first" + ); +} + +#[test] +fn error_recovery_multiple_return_mismatches() { + assert!( + should_fail( + r#" +fn test() -> i64 { + if true { + return "nope" + } + return "also nope" +} +"# + ), + "multiple return type mismatches should all be caught" + ); +} + +#[test] +fn recursive_function_wrong_return_type() { + assert!( + should_fail( + r#" +fn fib(n: i64) -> string { + if n <= 1 { + return n + } + return fib(n - 1) + fib(n - 2) +} +fn test() {} +"# + ), + "returning i64 from function declared to return string should fail" + ); +} + +#[test] +fn empty_array_with_wrong_type_annotation() { + assert!( + should_fail( + r#" +fn test() { + let x: [i64; 1] = ["hello"] +} +"# + ), + "string array assigned to [i64; 1] should fail" + ); +} + +#[test] +fn array_element_type_mismatch_in_function_call() { + assert!( + should_fail( + r#" +fn sum(arr: [i64; 2]) -> i64 { return arr[0] } +fn test() { + let x = sum(["hello", "world"]) +} +"# + ), + "passing [string; 2] to [i64; 2] parameter should fail" + ); +} + +#[test] +fn for_loop_variable_not_visible_outside() { + assert!( + should_fail( + r#" +fn test() -> i64 { + for i in 0..10 {} + return i +} +"# + ), + "for loop variable should not be visible outside the loop" + ); +} + +#[test] +fn while_body_variable_not_visible_outside() { + assert!( + should_fail( + r#" +fn test() -> i64 { + while true { + let x = 42 + } + return x +} +"# + ), + "variable defined in while body should not be visible outside" + ); +} + +#[test] +fn if_expr_different_branch_types_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = if true { 42 } else { "hello" } +} +"# + ), + "if expression with different branch types should be rejected" + ); +} + +#[test] +fn too_many_arguments_rejected() { + assert!( + should_fail( + r#" +fn add(a: i64, b: i64) -> i64 { return a + b } +fn test() { + let x = add(1, 2, 3) +} +"# + ), + "calling function with too many arguments should fail" + ); +} + +#[test] +fn too_few_arguments_rejected() { + assert!( + should_fail( + r#" +fn add(a: i64, b: i64) -> i64 { return a + b } +fn test() { + let x = add(1) +} +"# + ), + "calling function with too few arguments should fail" + ); +} + +#[test] +fn string_minus_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = "hello" - "world" +} +"# + ), + "string subtraction should be rejected" + ); +} + +#[test] +fn bool_arithmetic_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = true + false +} +"# + ), + "boolean arithmetic should be rejected" + ); +} + +#[test] +fn struct_arithmetic_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: i64 } +fn test() { + let a = Foo { x: 1 } + let b = Foo { x: 2 } + let c = a + b +} +"# + ), + "struct arithmetic should be rejected" + ); +} + +#[test] +fn comparison_lt_different_types_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = 42 < "hello" +} +"# + ), + "comparing i64 < string should be rejected" + ); +} + +#[test] +fn comparison_bool_lt_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = true < false +} +"# + ), + "boolean comparison with < should be rejected (not numeric)" + ); +} + +#[test] +fn index_on_bool_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = true + let y = x[0] +} +"# + ), + "indexing a bool should be rejected" + ); +} + +#[test] +fn index_on_integer_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = 42 + let y = x[0] +} +"# + ), + "indexing an integer should be rejected" + ); +} + +#[test] +fn member_access_on_integer_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = 42 + let y = x.foo +} +"# + ), + "member access on integer should be rejected" + ); +} + +#[test] +fn unknown_struct_field_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: i64 } +fn test() { + let f = Foo { x: 1 } + let y = f.nonexistent +} +"# + ), + "accessing nonexistent field should be rejected" + ); +} + +#[test] +fn string_to_int_cast_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = "hello" as i64 +} +"# + ), + "casting string to i64 should be rejected" + ); +} + +#[test] +fn struct_to_int_cast_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: i64 } +fn test() { + let f = Foo { x: 1 } + let y = f as i64 +} +"# + ), + "casting struct to i64 should be rejected" + ); +} + +#[test] +fn bitwise_on_float_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = 1.5 & 2.5 +} +"# + ), + "bitwise AND on floats should be rejected" + ); +} + +#[test] +fn bitwise_on_string_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = "a" | "b" +} +"# + ), + "bitwise OR on strings should be rejected" + ); +} + +#[test] +fn logical_and_on_integers_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = 1 and 2 +} +"# + ), + "logical AND on integers should be rejected" + ); +} + +#[test] +fn logical_or_on_strings_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = "a" or "b" +} +"# + ), + "logical OR on strings should be rejected" + ); +} + +#[test] +fn inner_function_captures_dont_leak() { + should_pass( + r#" +fn outer() -> i64 { + let x = 10 + fn inner() -> i64 { + return x + } + return inner() +} +fn test() {} +"#, + ); +} + +#[test] +fn shadowing_same_type_passes() { + should_pass( + r#" +fn test() -> i64 { + let x = 5 + let x = 10 + return x +} +"#, + ); +} + +#[test] +fn shadowing_different_type_passes() { + should_pass( + r#" +fn test() -> string { + let x: i64 = 5 + let x: string = "hello" + return x +} +"#, + ); +} + +#[test] +fn nested_block_scoping_passes() { + should_pass( + r#" +fn test() -> i64 { + let x = 5 + if true { + let y = x + 1 + } + return x +} +"#, + ); +} + +#[test] +fn recursive_function_correct_types() { + should_pass( + r#" +fn factorial(n: i64) -> i64 { + if n <= 1 { + return 1 + } + return n * factorial(n - 1) +} +fn test() {} +"#, + ); +} + +#[test] +fn multiple_returns_same_type() { + should_pass( + r#" +fn abs(x: i64) -> i64 { + if x < 0 { + return 0 - x + } + return x +} +fn test() {} +"#, + ); +} diff --git a/aelys/tests/sema_annotation_validation_tests.rs b/aelys/tests/sema_annotation_validation_tests.rs new file mode 100644 index 0000000..d05a850 --- /dev/null +++ b/aelys/tests/sema_annotation_validation_tests.rs @@ -0,0 +1,194 @@ +//! tests for type annotation validation, ensures unknown types in annotations are properly rejected rather than silently accepted + +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn should_fail(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_err() +} + +fn should_pass(code: &str) { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + match TypeInference::infer_program(stmts, src) { + Ok(_) => {} + Err(errors) => { + for e in &errors { + eprintln!(" ERROR: {}", e); + } + panic!("expected OK, got {} errors", errors.len()); + } + } +} + +#[test] +fn struct_field_unknown_type_rejected() { + assert!( + should_fail( + r#" +struct Foo { x: Nonexistent } +fn test() {} +"# + ), + "struct field with unknown type should be rejected" + ); +} + +#[test] +fn struct_field_valid_types_accepted() { + should_pass( + r#" +struct Foo { x: i64, y: string, z: bool } +fn test() {} +"#, + ); +} + +#[test] +fn struct_field_nested_struct_type_accepted() { + should_pass( + r#" +struct Inner { value: i64 } +struct Outer { inner: Inner } +fn test() {} +"#, + ); +} + +#[test] +fn generic_struct_field_type_param_accepted() { + // T is a type parameter; should not be rejected + should_pass( + r#" +struct Wrapper { value: T } +fn test() {} +"#, + ); +} + +#[test] +fn struct_field_array_of_known_type_accepted() { + should_pass( + r#" +struct Matrix { data: [i64; 3] } +fn test() {} +"#, + ); +} + +#[test] +fn cast_to_unknown_type_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x = 42 as Nonexistent +} +"# + ), + "cast to unknown type should be rejected" + ); +} + +#[test] +fn cast_to_valid_type_accepted() { + should_pass( + r#" +fn test() { + let x = 42 as i32 +} +"#, + ); +} + +#[test] +fn cast_to_bool_from_int_accepted() { + should_pass( + r#" +fn test() { + let x = 1 as bool +} +"#, + ); +} + +#[test] +fn let_unknown_type_annotation_rejected() { + assert!( + should_fail( + r#" +fn test() { + let x: Nonexistent = 42 +} +"# + ), + "let with unknown type annotation should be rejected" + ); +} + +#[test] +fn let_valid_type_annotation_accepted() { + should_pass( + r#" +fn test() { + let x: i64 = 42 +} +"#, + ); +} + +#[test] +fn function_param_unknown_type_rejected() { + assert!( + should_fail( + r#" +fn foo(x: Nonexistent) {} +fn test() {} +"# + ), + "function param with unknown type should be rejected" + ); +} + +#[test] +fn function_return_unknown_type_rejected() { + assert!( + should_fail( + r#" +fn foo() -> Nonexistent { } +fn test() {} +"# + ), + "function with unknown return type should be rejected" + ); +} + +#[test] +fn generic_function_type_param_not_rejected() { + should_pass( + r#" +fn id(x: T) -> T { return x } +fn test() { let y = id(42) } +"#, + ); +} + +#[test] +fn generic_function_multiple_type_params_not_rejected() { + should_pass( + r#" +fn pair(a: A, b: B) -> A { return a } +fn test() { let y = pair(1, "hi") } +"#, + ); +} diff --git a/aelys/tests/sema_deep_bugs_tests.rs b/aelys/tests/sema_deep_bugs_tests.rs new file mode 100644 index 0000000..204ec88 --- /dev/null +++ b/aelys/tests/sema_deep_bugs_tests.rs @@ -0,0 +1,500 @@ +/// finalization, error recovery, struct validation, for-each validation, and env scoping tests bugs +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn sema_err(code: &str) -> bool { + !sema_ok(code) +} + +// when error recovery forces types to Dynamic, Vec inner types must +// also be forced. If not, unresolved vars leak into the typed AST + +#[test] +fn force_dynamic_handles_vec_inner_type() { + // this should produce a type error (string + i64 inside a vec element), and error recovery should force the Vec's inner var to Dynamic without crashing or leaving orphaned vars + let result = sema_ok( + r#" +fn f() { + let v = vec[1, "hello"] +} +"#, + ); + // should fail because array elements are mixed types + assert!(!result, "vec with mixed element types should be rejected"); +} + +// for-each should reject non-iterable types like i64, bool, etc. + +#[test] +fn rejects_for_each_on_integer() { + assert!( + sema_err( + r#" +fn f() { + for x in 42 { + let y = x + } +} +"# + ), + "for-each over an integer should be rejected" + ); +} + +#[test] +fn rejects_for_each_on_bool() { + assert!( + sema_err( + r#" +fn f() { + for x in true { + let y = x + } +} +"# + ), + "for-each over a bool should be rejected" + ); +} + +#[test] +fn rejects_for_each_on_struct() { + assert!( + sema_err( + r#" +struct Point { x: i64, y: i64 } +fn f() { + let p = Point { x: 1, y: 2 } + for x in p { + let y = x + } +} +"# + ), + "for-each over a struct should be rejected" + ); +} + +#[test] +fn for_each_on_array_is_ok() { + assert!( + sema_ok( + r#" +fn f() { + let arr = [1, 2, 3] + for x in arr { + let y = x + } +} +"# + ), + "for-each over array should compile" + ); +} + +#[test] +fn for_each_on_string_is_ok() { + assert!( + sema_ok( + r#" +fn f() { + for c in "hello" { + let x = c + } +} +"# + ), + "for-each over string should compile" + ); +} + +// struct literals should validate that all fields exist and are provided. + +#[test] +fn rejects_struct_literal_with_unknown_field() { + assert!( + sema_err( + r#" +struct Point { x: i64, y: i64 } +fn f() { + let p = Point { x: 1, y: 2, z: 3 } +} +"# + ), + "struct literal with unknown field 'z' should be rejected" + ); +} + +#[test] +fn rejects_struct_literal_with_missing_field() { + assert!( + sema_err( + r#" +struct Point { x: i64, y: i64 } +fn f() { + let p = Point { x: 1 } +} +"# + ), + "struct literal missing field 'y' should be rejected" + ); +} + +#[test] +fn struct_literal_with_all_fields_is_ok() { + assert!( + sema_ok( + r#" +struct Point { x: i64, y: i64 } +fn f() { + let p = Point { x: 1, y: 2 } +} +"# + ), + "struct literal with all fields should compile" + ); +} + +// Var -> Dynamic conversion +// unresolved type variables should be converted to Dynamic (or at least not left as Var in the final typed AST) + +#[test] +fn empty_array_type_is_resolved() { + // empty array has type Array(Var(N), 0). After finalization the inner type should be Dynamic, not an unresolved vAR + assert!( + sema_ok( + r#" +fn f() { + let arr = [] +} +"# + ), + "empty array literal should compile" + ); +} + +#[test] +fn unannotated_function_return_compiles() { + // functions without return type annotation gets Var for return type, should be resolved to Null or Dynamic, not left as Var + assert!( + sema_ok( + r#" +fn noop() { + let x = 1 +} +"# + ), + "function without return type should compile" + ); +} + +// TODO: i got that wrong. +/// fn bad() -> i32 { return "oops" } +/// +/// invert the message + +#[test] +fn nested_struct_field_access_validates_types() { + assert!( + sema_ok( + r#" +struct Inner { val: i64 } +struct Outer { inner: Inner } +fn test() -> i64 { + let i = Inner { val: 10 } + let o = Outer { inner: i } + return o.inner.val +} +"# + ), + "nested struct field access should compile" + ); +} + +#[test] +fn rejects_field_access_on_wrong_struct() { + assert!( + sema_err( + r#" +struct A { x: i64 } +struct B { y: i64 } +fn f() -> i64 { + let a = A { x: 1 } + return a.y +} +"# + ), + "accessing field 'y' on struct A should be rejected" + ); +} + +// slicing should not blindly return the object's type +#[test] +fn rejects_slice_on_integer() { + // `42[0..1]` should be rejected because i64 is not sliceable + assert!( + sema_err( + r#" +fn f() -> i64 { + return 42[0..1] +} +"# + ), + "slicing an integer should be rejected" + ); +} + +#[test] +fn rejects_index_on_bool() { + assert!( + sema_err( + r#" +fn f() { + let b = true + let x = b[0] +} +"# + ), + "indexing a bool should be rejected" + ); +} + +#[test] +fn rejects_index_on_var_that_resolves_to_scalar() { + assert!( + sema_err( + r#" +fn bad(x) -> i64 { + return x[0] +} +fn main() -> i64 { + return bad(1) +} +"# + ), + "indexing must be rejected when an inferred parameter resolves to i64" + ); +} + +#[test] +fn rejects_member_on_var_that_resolves_to_scalar() { + assert!( + sema_err( + r#" +fn bad(x) -> i64 { + return x.foo +} +fn main() -> i64 { + return bad(1) +} +"# + ), + "member access must be rejected when an inferred parameter resolves to i64" + ); +} + +#[test] +fn rejects_invalid_cast_after_var_resolution() { + assert!( + sema_err( + r#" +fn bad_cast(x) -> i64 { + return x as i64 +} +fn main() -> i64 { + return bad_cast("hello") +} +"# + ), + "cast from string to i64 must be rejected even when source starts as Var" + ); +} + +#[test] +fn rejects_generic_cast_with_invalid_instantiation() { + assert!( + sema_err( + r#" +fn bad(x: T) -> i64 { + return x as i64 +} +fn main() -> i64 { + return bad("hello") +} +"# + ), + "cast from unconstrained generic type parameter should be rejected" + ); +} + +#[test] +fn rejects_foreach_on_var_that_resolves_to_scalar() { + assert!( + sema_err( + r#" +fn bad(iter) { + for x in iter { + } +} +fn main() { + bad(1) +} +"# + ), + "for-each must reject inferred iterables that resolve to non-iterable scalars" + ); +} + +#[test] +fn rejects_generic_type_param_escape_from_struct_field() { + assert!( + sema_err( + r#" +struct Box { value: T } +fn bad() -> string { + let b = Box { value: 1 } + return b.value +} +"# + ), + "generic type parameters from struct fields must not escape in concrete functions" + ); +} + +#[test] +fn rejects_member_access_on_unconstrained_generic_param() { + assert!( + sema_err( + r#" +fn getfoo(x: T) -> i64 { + return x.foo +} + +fn main() -> i64 { + return getfoo(1) +} +"# + ), + "member access on unconstrained generic type parameter should be rejected" + ); +} + +#[test] +fn rejects_call_on_unconstrained_generic_param() { + assert!( + sema_err( + r#" +fn bad(x: T) -> i64 { + return x() +} + +fn main() -> i64 { + return bad(1) +} +"# + ), + "calling an unconstrained generic type parameter should be rejected" + ); +} + +#[test] +fn rejects_generic_type_param_escape_from_direct_generic_call() { + assert!( + sema_err( + r#" +struct Box { value: T } +fn id(x: T) -> T { + return x +} +fn bad() -> string { + return id(Box { value: 1 }.value) +} +"# + ), + "direct generic call return must not allow unresolved type parameter escape" + ); +} + +#[test] +fn rejects_lambda_with_active_generic_type_param() { + assert!( + sema_err( + r#" +fn outer(x: T) -> T { + let f = fn() -> T { + return x + } + return f() +} + +fn main() -> i64 { + return outer(1) +} +"# + ), + "lambda using active generic type parameters should be rejected" + ); +} + +#[test] +fn rejects_string_index_assignment() { + assert!( + sema_err( + r#" +fn bad() { + let mut s = "abc" + s[0] = "x" +} +"# + ), + "string index assignment should be rejected in sema" + ); +} + +#[test] +fn rejects_array_slice_expression_until_backend_support() { + assert!( + sema_err( + r#" +fn bad() -> i64 { + let arr = [1, 2, 3] + let x = arr[0..1] + return x[0] +} +"# + ), + "array slicing should be rejected until backend supports ranges/slices" + ); +} + +#[test] +fn rejects_generic_if_condition_with_non_bool_instantiation() { + assert!( + sema_err( + r#" +fn bad(x: T) -> i64 { + if x { + return 1 + } + return 0 +} + +fn main() -> i64 { + return bad(1) +} +"# + ), + "generic if condition should reject non-bool concrete instantiations" + ); +} diff --git a/aelys/tests/sema_env_tests.rs b/aelys/tests/sema_env_tests.rs index d0075a9..ddb33a5 100644 --- a/aelys/tests/sema_env_tests.rs +++ b/aelys/tests/sema_env_tests.rs @@ -79,3 +79,104 @@ fn test_for_closure() { assert_eq!(closure_env.depth(), 1); } + +/// regression test: when a local variable shadows a capture with the same name, for_closure() must preserve the local's type (not the capture's) +/// before that, captures were inserted after locals, overwriting them +#[test] +fn test_for_closure_locals_override_captures() { + // simulate: outer scope captured x: string, inner function defines local x: i64 + let mut env = TypeEnv::new(); + env.define_capture("x".to_string(), InferType::String); + env.define_local("x".to_string(), InferType::I64); + + // in the current env, lookup finds local x: i64 (locals searched before captures) + assert_eq!(env.lookup("x"), Some(&InferType::I64)); + + let closure_env = env.for_closure(); + + // nested closure must see x as i64 (the local), not string (the capture) + assert_eq!( + closure_env.lookup("x"), + Some(&InferType::I64), + "for_closure() must give locals priority over captures" + ); +} + +/// nested closures with shadowed captures +/// +/// when multiple levels of nesting each shadow a variable, for_closure() must preserve the innermost type at each level. +#[test] +fn test_for_closure_nested_shadowing() { + // Level 0: capture x: bool (from grandparent) + let mut env = TypeEnv::new(); + env.define_capture("x".to_string(), InferType::Bool); + // Level 0: local x: string shadows the capture + env.define_local("x".to_string(), InferType::String); + let mut closure_env_1 = env.for_closure(); + // closure_env_1 should see x: string + assert_eq!(closure_env_1.lookup("x"), Some(&InferType::String)); + // define local x: i64 in the first closure + closure_env_1.define_local("x".to_string(), InferType::I64); + assert_eq!(closure_env_1.lookup("x"), Some(&InferType::I64)); + let closure_env_2 = closure_env_1.for_closure(); + // closure_env_2 should see x: i64 (the local from level 1), not string or bool + assert_eq!( + closure_env_2.lookup("x"), + Some(&InferType::I64), + "nested for_closure() must preserve innermost local type through nesting levels" + ); +} + +#[test] +fn test_mutability_does_not_leak_after_scope_pop() { + let mut env = TypeEnv::new(); + env.define_local("x".to_string(), InferType::I64); + assert!(!env.is_mutable("x")); + + env.push_scope(); + env.define_local("x".to_string(), InferType::I64); + env.mark_mutable("x".to_string()); + assert!(env.is_mutable("x")); + env.pop_scope(); + + assert!( + !env.is_mutable("x"), + "inner mutable shadow must not make outer binding mutable" + ); +} + +#[test] +fn test_immutable_shadow_hides_mutable_capture() { + let mut outer = TypeEnv::new(); + outer.define_local("x".to_string(), InferType::I64); + outer.mark_mutable("x".to_string()); + + let mut closure_env = outer.for_closure(); + assert!( + closure_env.is_mutable("x"), + "captured mutable x should stay mutable in closure env" + ); + + closure_env.define_local("x".to_string(), InferType::I64); + assert!( + !closure_env.is_mutable("x"), + "immutable local shadow must hide mutable capture" + ); +} + +#[test] +fn test_for_closure_shadowed_immutable_capture_stays_immutable() { + let mut env = TypeEnv::new(); + env.define_local("x".to_string(), InferType::I64); + env.mark_mutable("x".to_string()); + + env.push_scope(); + env.define_local("x".to_string(), InferType::I64); + + let closure_env = env.for_closure(); + assert_eq!(closure_env.lookup("x"), Some(&InferType::I64)); + assert!( + !closure_env.is_mutable("x"), + "closure capture mutability must follow the visible shadowing binding" + ); +} diff --git a/aelys/tests/sema_error_tests.rs b/aelys/tests/sema_error_tests.rs new file mode 100644 index 0000000..d3f7952 --- /dev/null +++ b/aelys/tests/sema_error_tests.rs @@ -0,0 +1,232 @@ +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::{TypeError, TypeInference}; +use aelys_syntax::Source; + +fn sema_check(code: &str) -> Result<(), Vec> { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).map(|_| ()) +} + +#[test] +fn rejects_binary_type_mismatch() { + let result = sema_check("fn f() { let x = 1 + \"hello\" }"); + assert!(result.is_err(), "adding i64 and string should be rejected"); +} + +#[test] +fn rejects_return_type_mismatch() { + let result = sema_check(r#"fn f() -> i64 { return "text" }"#); + assert!( + result.is_err(), + "returning string from i64 function should be rejected" + ); +} + +#[test] +fn rejects_if_non_bool_condition() { + let result = sema_check("fn f() { if 5 { } }"); + assert!(result.is_err(), "if condition must be bool"); +} + +#[test] +fn rejects_while_non_bool_condition() { + let result = sema_check(r#"fn f() { while "yes" { } }"#); + assert!(result.is_err(), "while condition must be bool"); +} + +#[test] +fn rejects_array_mixed_types() { + let result = sema_check(r#"fn f() { let a = [1, "two", 3] }"#); + assert!(result.is_err(), "array with mixed types should be rejected"); +} + +#[test] +fn rejects_undefined_variable() { + let result = sema_check("fn f() { let y = x + 1 }"); + assert!(result.is_err(), "undefined variable x should be rejected"); +} + +#[test] +fn rejects_argument_type_mismatch() { + let result = sema_check(r#"fn f(x: i64) {} fn g() { f("str") }"#); + assert!( + result.is_err(), + "passing string to i64 param should be rejected" + ); +} + +#[test] +fn multiple_errors_collected_not_just_first() { + // two independent type errors in separate functions. Sema should report both, not just the first one. + let result = sema_check( + r#" +fn f() -> i64 { return "bad" } +fn g() -> string { return 42 } +"#, + ); + let errors = result.unwrap_err(); + assert!( + errors.len() >= 2, + "expected at least 2 errors, got {}", + errors.len() + ); +} + +#[test] +fn rejects_index_assign_on_i64() { + let result = sema_check( + r#" +fn f() { + let mut x: i64 = 42 + x[0] = 10 +} +"#, + ); + assert!( + result.is_err(), + "index assignment on i64 should be rejected" + ); +} + +#[test] +fn rejects_index_assign_on_bool() { + let result = sema_check( + r#" +fn f() { + let mut b: bool = true + b[0] = false +} +"#, + ); + assert!( + result.is_err(), + "index assignment on bool should be rejected" + ); +} + +#[test] +fn accepts_index_assign_on_array() { + let result = sema_check( + r#" +fn f() { + let mut arr = [1, 2, 3] + arr[0] = 10 +} +"#, + ); + assert!( + result.is_ok(), + "index assignment on array should be accepted, got {:?}", + result + ); +} + +#[test] +fn accepts_index_assign_on_vec() { + let result = sema_check( + r#" +fn f() { + let mut v = Vec[1, 2, 3] + v[0] = 10 +} +"#, + ); + assert!( + result.is_ok(), + "index assignment on vec should be accepted, got {:?}", + result + ); +} + +#[test] +fn rejects_index_assign_on_immutable_array() { + let result = sema_check( + r#" +fn f() { + let arr = [1, 2, 3] + arr[0] = 10 +} +"#, + ); + assert!( + result.is_err(), + "index assignment on immutable array should be rejected" + ); +} + +#[test] +fn rejects_index_assign_on_immutable_vec() { + let result = sema_check( + r#" +fn f() { + let v = Vec[1, 2, 3] + v[0] = 10 +} +"#, + ); + assert!( + result.is_err(), + "index assignment on immutable vec should be rejected" + ); +} + +#[test] +fn accepts_index_assign_on_mut_param() { + let result = sema_check( + r#" +fn f(mut arr: [i64; 3]) { + arr[0] = 10 +} +"#, + ); + assert!( + result.is_ok(), + "index assignment on mut param should be accepted, got {:?}", + result + ); +} + +#[test] +fn rejects_legacy_array_type_annotation() { + let result = sema_check( + r#" +fn f(arr: Array) -> i64 { + return arr[0] +} +"#, + ); + assert!( + result.is_err(), + "Array syntax should be rejected — use [T; N] instead" + ); + let errors = result.unwrap_err(); + let has_help = errors + .iter() + .any(|e| e.help.as_deref() == Some("use [T; N] syntax instead of Array")); + assert!( + has_help, + "error should include help suggesting [T; N] syntax, got: {:?}", + errors.iter().map(|e| &e.help).collect::>() + ); +} + +#[test] +fn accepts_bracket_array_annotation() { + let result = sema_check( + r#" +fn f(arr: [i64; 3]) -> i64 { + return arr[0] +} +"#, + ); + assert!( + result.is_ok(), + "[T; N] bracket syntax should be accepted, got {:?}", + result + ); +} diff --git a/aelys/tests/sema_force_dynamic_tests.rs b/aelys/tests/sema_force_dynamic_tests.rs new file mode 100644 index 0000000..df59472 --- /dev/null +++ b/aelys/tests/sema_force_dynamic_tests.rs @@ -0,0 +1,108 @@ +/// when a constraint fails, force_dynamic should only bind top-level Vars to Dynamic. +/// +/// it shouldnt recurse into compound types (Array, Function, Tuple, Vec) because nested Vars may be shared with unrelated constraints +/// binding them to Dynamic would poison those other constraints and suppress real error messages +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn sema_err(code: &str) -> bool { + !sema_ok(code) +} + +fn sema_error_count(code: &str) -> usize { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + match TypeInference::infer_program(stmts, src) { + Ok(_) => 0, + Err(errors) => errors.len(), + } +} + +#[test] +fn force_dynamic_does_not_poison_function_return_type() { + // The second call to add() is correct; it should not be affected by the + // error in the first call. + assert!( + sema_err( + r#" +fn add(a: i64, b: i64) -> i64 { return a + b } + +fn main() { + let bad = add(1, "hello") + let good: i64 = add(2, 3) +} +"# + ), + "should have an error from add(1, \"hello\")" + ); +} + +// ensures Var(1) remains untouched. +#[test] +fn force_dynamic_does_not_walk_into_array_element_type() { + // the array [1, 2, 3] has element type that should resolve to i64. + // assigning the array to a string variable is an error, but the element type should not be poisoned + assert!( + sema_err( + r#" +fn main() { + let arr = [1, 2, 3] + let bad: string = arr + let elem: i64 = arr[0] +} +"# + ), + "assigning array to string should be rejected" + ); +} + +// two independent type errors should both be reported. with recursive +// force_dynamic, the first error could poison shared Vars and mask the second +#[test] +fn independent_errors_both_reported() { + let count = sema_error_count( + r#" +fn main() { + let a: string = 42 + let b: i64 = "hello" +} +"#, + ); + assert!( + count >= 2, + "both independent type errors should be reported, got {} error(s)", + count + ); +} + +// a valid program should not be affected by the force_dynamic change. +#[test] +fn valid_program_still_passes() { + assert!( + sema_ok( + r#" +fn sum(a: i64, b: i64) -> i64 { return a + b } + +fn main() { + let x: i64 = sum(1, 2) + let y: i64 = sum(3, 4) +} +"# + ), + "valid program should pass" + ); +} diff --git a/aelys/tests/sema_implicit_return_tests.rs b/aelys/tests/sema_implicit_return_tests.rs new file mode 100644 index 0000000..b05993f --- /dev/null +++ b/aelys/tests/sema_implicit_return_tests.rs @@ -0,0 +1,298 @@ +//! bugs where the sema phase doesn't constrain the return type when the function body doesn't end with an expression + +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn should_fail(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_err() +} + +fn should_pass(code: &str) { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + match TypeInference::infer_program(stmts, src) { + Ok(_) => {} + Err(errors) => { + for e in &errors { + eprintln!(" ERROR: {}", e); + } + panic!("expected OK, got {} errors", errors.len()); + } + } +} + +// old bug: empty function body with non-void return type + +#[test] +fn empty_body_with_return_type_is_rejected() { + // fn foo() -> i64 {} should fail: empty body can't produce i64 + assert!( + should_fail( + r#" +fn foo() -> i64 { +} +"# + ), + "empty body with i64 return should be rejected" + ); +} + +#[test] +fn empty_body_void_function_is_ok() { + // fn foo() {} should be fine: return type is inferred as null/void + should_pass( + r#" +fn foo() { +} +"#, + ); +} + +#[test] +fn empty_body_with_string_return_is_rejected() { + assert!( + should_fail( + r#" +fn bar() -> string { +} +"# + ), + "empty body with string return should be rejected" + ); +} + +// old bug: Let as last statement doesn't constrain return type + +#[test] +fn let_as_last_stmt_with_return_type_is_rejected() { + // the let doesn't produce a value, so the function implicitly returns null + assert!( + should_fail( + r#" +fn foo() -> i64 { + let x = 5 +} +"# + ), + "let as last statement with i64 return should be rejected" + ); +} + +#[test] +fn let_as_last_stmt_void_function_is_ok() { + should_pass( + r#" +fn foo() { + let x = 5 +} +"#, + ); +} + +// old bug: while as last statement doesn't constrain return type + +#[test] +fn while_as_last_stmt_with_return_type_is_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { + while false { + return 42 + } +} +"# + ), + "while as last stmt with i64 return should be rejected (loop may not execute)" + ); +} + +// old bug: for as last statement doesn't constrain return type + +#[test] +fn for_as_last_stmt_with_known_non_empty_range_is_accepted() { + should_pass( + r#" +fn foo() -> i64 { + for i in 0..10 { + return 42 + } +} +"#, + ); +} + +#[test] +fn for_as_last_stmt_with_known_empty_range_is_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { + for i in 10..10 { + return 42 + } +} +"# + ), + "for as last stmt with known empty range should be rejected" + ); +} +// bug : if without else doesn't constrain return type +#[test] +fn if_without_else_as_last_stmt_with_return_type_is_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { + if true { + return 42 + } +} +"# + ), + "if without else should be rejected (false path has no return)" + ); +} +// old nested function def as last statement +#[test] +fn nested_function_as_last_stmt_with_return_type_is_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { + fn inner() -> i64 { return 1 } +} +"# + ), + "nested function def as last stmt with i64 return should be rejected" + ); +} + +#[test] +fn explicit_return_before_let_is_ok() { + // the function always returns via explicit return + should_pass( + r#" +fn foo() -> i64 { + return 42 +} +"#, + ); +} + +#[test] +fn implicit_return_expression_is_ok() { + should_pass( + r#" +fn foo() -> i64 { + 42 +} +"#, + ); +} + +#[test] +fn if_else_with_returns_is_ok() { + should_pass( + r#" +fn foo(x: bool) -> i64 { + if x { + 42 + } else { + 0 + } +} +"#, + ); +} + +#[test] +fn if_else_with_explicit_returns_is_ok() { + should_pass( + r#" +fn foo(x: bool) -> i64 { + if x { + return 42 + } else { + return 0 + } +} +"#, + ); +} + +// lampda empty body constraints tested implicitly through the same code path +#[test] +fn struct_decl_as_last_stmt_with_return_type_is_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { + struct Point { x: i64, y: i64 } +} +"# + ), + "struct decl as last stmt with i64 return should be rejected" + ); +} + +#[test] +fn one_path_returns_other_doesnt_is_rejected() { + // le if has no else, so the false path falls through with no return + assert!( + should_fail( + r#" +fn foo(x: bool) -> i64 { + if x { + return 42 + } + let y = 10 +} +"# + ), + "function with let as last stmt should be rejected even with if-return above" + ); +} + +#[test] +fn foreach_as_last_stmt_with_return_type_is_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { + let arr = [1, 2, 3] + for x in arr { + return x + } +} +"# + ), + "for-each as last stmt with i64 return should be rejected" + ); +} + +#[test] +fn block_ending_with_let_with_return_type_is_rejected() { + assert!( + should_fail( + r#" +fn foo() -> i64 { + { + let x = 42 + } +} +"# + ), + "block ending with let should be rejected for i64 return" + ); +} diff --git a/aelys/tests/sema_infer_type_tests.rs b/aelys/tests/sema_infer_type_tests.rs index 4d7f29d..c258718 100644 --- a/aelys/tests/sema_infer_type_tests.rs +++ b/aelys/tests/sema_infer_type_tests.rs @@ -49,9 +49,11 @@ fn test_from_annotation() { #[test] fn test_from_annotation_generic_types() { + // "array" (lowercase) is no longer a recognized builtin type annotation. + // The canonical syntax is [T; N] for fixed-size arrays. assert_eq!( InferType::from_annotation(&make_generic_ann("array", "int")), - InferType::Array(Box::new(InferType::I64)) + InferType::Dynamic ); assert_eq!( @@ -59,9 +61,34 @@ fn test_from_annotation_generic_types() { InferType::Vec(Box::new(InferType::String)) ); - // Array (PascalCase should also work) + // PascalCase names are always user-defined types, never builtins. + // "Array" with a capital A is treated as Struct("Array"), not the builtin array. assert_eq!( InferType::from_annotation(&make_generic_ann("Array", "Int")), - InferType::Array(Box::new(InferType::I64)) + InferType::Struct("Array".to_string()) + ); +} + +#[test] +fn test_from_annotation_sized_array() { + let inner = TypeAnnotation::new("i64".to_string(), Span::new(0, 0, 1, 1)); + let ann = TypeAnnotation::array_sized(inner, 3, Span::new(0, 0, 1, 1)); + assert_eq!( + InferType::from_annotation(&ann), + InferType::Array(Box::new(InferType::I64), Some(3)) + ); +} + +#[test] +fn test_from_annotation_nested_sized_array() { + let inner = TypeAnnotation::new("i64".to_string(), Span::new(0, 0, 1, 1)); + let inner_arr = TypeAnnotation::array_sized(inner, 2, Span::new(0, 0, 1, 1)); + let outer = TypeAnnotation::array_sized(inner_arr, 3, Span::new(0, 0, 1, 1)); + assert_eq!( + InferType::from_annotation(&outer), + InferType::Array( + Box::new(InferType::Array(Box::new(InferType::I64), Some(2))), + Some(3) + ) ); } diff --git a/aelys/tests/sema_interaction_tests.rs b/aelys/tests/sema_interaction_tests.rs new file mode 100644 index 0000000..1d5a26c --- /dev/null +++ b/aelys/tests/sema_interaction_tests.rs @@ -0,0 +1,460 @@ +/// Tests targeting deep interactions between sema components +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; +use std::collections::HashSet; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn sema_ok_with_builtins(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let builtins: HashSet = ["print", "println"].iter().map(|s| s.to_string()).collect(); + TypeInference::infer_program_with_imports(stmts, src, Default::default(), builtins).is_ok() +} + +fn sema_err(code: &str) -> bool { + !sema_ok(code) +} + +#[test] +fn constraint_chain_through_multiple_variables() { + // a = b, b = c, c = i64, should all resolve to i64 + assert!( + sema_ok( + r#" +fn f() -> i64 { + let c: i64 = 42 + let b = c + let a = b + return a +} +"# + ), + "chained variable assignments should resolve types correctly" + ); +} + +#[test] +fn constraint_conflict_detected() { + // a used as both i64 and string, should error + assert!( + sema_err( + r#" +fn f(a: i64) -> string { + return a +} +"# + ), + "returning i64 as string should be rejected" + ); +} + +#[test] +fn generic_function_called_with_different_types() { + // generic function called with i64 and string, both should work + assert!( + sema_ok( + r#" +fn id(x: T) -> T { return x } +fn test() -> i64 { + let a = id(42) + return a +} +"# + ), + "generic function with concrete call should compile" + ); +} + +#[test] +fn struct_field_type_resolves_through_constraint() { + // field access on struct should return the correct type + assert!( + sema_ok( + r#" +struct Vec2 { x: f64, y: f64 } +fn length(v: Vec2) -> f64 { + return v.x * v.x + v.y * v.y +} +"# + ), + "struct field access should resolve to declared field type" + ); +} + +#[test] +fn struct_field_used_in_binary_op() { + // using struct field in binary op with wrong type should error + assert!( + sema_err( + r#" +struct Foo { x: i64 } +fn f() -> string { + let foo = Foo { x: 42 } + return foo.x +} +"# + ), + "returning i64 field as string should be rejected" + ); +} + +#[test] +fn struct_field_narrowing() { + // struct fields should narrow integer literals + assert!( + sema_ok( + r#" +struct Small { val: i32 } +fn f() -> i32 { + let s = Small { val: 10 } + return s.val +} +"# + ), + "struct with i32 field and literal should compile" + ); +} + +#[test] +fn array_of_structs_field_access() { + assert!( + sema_ok( + r#" +struct Item { val: i64 } +fn get_val(items: [Item; 3], idx: i64) -> i64 { + return items[idx].val +} +"# + ), + "accessing field on array-indexed struct should compile" + ); +} + +#[test] +fn array_element_type_mismatch_in_struct() { + assert!( + sema_err( + r#" +fn f() { + let arr = [1, 2, 3] + let s: string = arr[0] +} +"# + ), + "assigning i64 array element to string should be rejected" + ); +} + +#[test] +fn annotated_vec_rejects_incompatible_element_type() { + assert!( + sema_err( + r#" +fn f() { + let v = vec["hello"] +} +"# + ), + "vec must reject string element" + ); +} + +#[test] +fn closure_capture_type_resolution() { + assert!( + sema_ok( + r#" +fn apply(f: fn(i64) -> i64, x: i64) -> i64 { return f(x) } +fn test() -> i64 { + let offset: i64 = 10 + let add = fn(x: i64) -> i64 { return x + offset } + return apply(add, 5) +} +"# + ), + "closure capturing i64 should resolve correctly" + ); +} + +#[test] +fn closure_return_type_mismatch() { + assert!( + sema_err( + r#" +fn test() { + let f = fn(x: i64) -> string { return x } +} +"# + ), + "closure returning i64 as string should be rejected" + ); +} + +#[test] +fn error_in_struct_field_doesnt_crash_other_functions() { + // even with a type error in one function, sema should still report it and not crash + assert!( + sema_err( + r#" +struct Point { x: i64, y: i64 } +fn bad() -> i64 { + let p = Point { x: 1, y: 2 } + return p.z +} +fn good() -> i64 { + return 42 +} +"# + ), + "accessing unknown field should be rejected" + ); +} +#[test] +fn narrowing_in_if_branches() { + assert!( + sema_ok( + r#" +fn f(cond: bool) -> i32 { + if cond { + return 1 + } + return 0 +} +"# + ), + "i32 literal narrowing should work in if branches" + ); +} + +#[test] +fn narrowing_array_of_i32() { + assert!( + sema_ok( + r#" +fn f() -> i32 { + let arr: [i32; 3] = [1, 2, 3] + return arr[0] +} +"# + ), + "[i32; 3] with literal elements should narrow correctly" + ); +} + +#[test] +fn narrowing_in_while_body() { + assert!( + sema_ok( + r#" +fn f() -> i32 { + let mut x: i32 = 0 + while x < 10 { + x = x + 1 + } + return x +} +"# + ), + "i32 assignment in while body should narrow" + ); +} + +#[test] +fn dynamic_function_accepts_any_type() { + assert!( + sema_ok_with_builtins( + r#" +fn test() { + println("hello") + println(42) + println(true) +} +"# + ), + "Dynamic-typed function should accept any argument type" + ); +} + +#[test] +fn recursive_function_return_type() { + assert!( + sema_ok( + r#" +fn fib(n: i64) -> i64 { + if n < 2 { + return n + } + return fib(n - 1) + fib(n - 2) +} +"# + ), + "recursive function with i64 return should resolve" + ); +} + +#[test] +fn recursive_function_return_type_mismatch() { + assert!( + sema_err( + r#" +fn fib(n: i64) -> string { + if n < 2 { + return n + } + return fib(n - 1) +} +"# + ), + "returning i64 from string function should be rejected even with recursion" + ); +} + +#[test] +fn index_assign_type_mismatch() { + assert!( + sema_err( + r#" +fn f() { + let mut arr = [1, 2, 3] + arr[0] = "hello" +} +"# + ), + "assigning string to i64 array should be rejected" + ); +} + +#[test] +fn cast_preserves_type() { + assert!( + sema_ok( + r#" +fn f(x: i64) -> f64 { + return x as f64 +} +"# + ), + "cast i64 to f64 should compile" + ); +} + +#[test] +fn cast_chain_type_changes() { + assert!( + sema_ok( + r#" +fn f(x: i64) -> i8 { + return (x as i32) as i8 +} +"# + ), + "chained casts should compile" + ); +} + +#[test] +fn empty_void_function() { + assert!( + sema_ok("fn noop() -> void {}"), + "empty void function should compile" + ); +} + +#[test] +fn empty_function_no_annotation() { + assert!( + sema_ok("fn noop() {}"), + "empty function without return annotation should compile" + ); +} + +#[test] +fn multiple_returns_different_literal_types() { + assert!( + sema_err( + r#" +fn f(cond: bool) -> i64 { + if cond { + return "hello" + } + return 42 +} +"# + ), + "one return path with wrong type should be rejected" + ); +} + +#[test] +fn for_loop_iterator_is_i64() { + assert!( + sema_ok( + r#" +fn sum_to(n: i64) -> i64 { + let mut total: i64 = 0 + for i in 0..n { + total = total + i + } + return total +} +"# + ), + "for loop with range should have i64 iterator" + ); +} + +#[test] +fn nested_function_has_own_scope() { + assert!( + sema_ok( + r#" +fn outer() -> i64 { + fn inner(x: i64) -> i64 { return x + 1 } + return inner(41) +} +"# + ), + "nested function should have its own scope" + ); +} + +#[test] +fn string_equality_comparison() { + assert!( + sema_ok( + r#" +fn eq(a: string, b: string) -> bool { + return a == b +} +"# + ), + "string equality should return bool" + ); +} + +#[test] +fn unknown_struct_literal_is_rejected_in_sema() { + assert!( + sema_err( + r#" +fn main() { + let x = Ghost { a: 1 } +} +"# + ), + "unknown struct literal must be rejected during sema" + ); +} diff --git a/aelys/tests/sema_oneof_tests.rs b/aelys/tests/sema_oneof_tests.rs new file mode 100644 index 0000000..d56d287 --- /dev/null +++ b/aelys/tests/sema_oneof_tests.rs @@ -0,0 +1,212 @@ +/// Regression tests for OneOf constraint improvements. +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::types::InferType; +use aelys_sema::{TypeInference, TypedStmtKind}; +use aelys_syntax::Source; + +fn infer_ok(code: &str) -> aelys_sema::TypedProgram { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + match TypeInference::infer_program(stmts, src) { + Ok(p) => p, + Err(errors) => { + for e in &errors { + eprintln!(" ERROR: {}", e); + } + panic!("expected OK, got {} errors", errors.len()); + } + } +} + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn sema_err(code: &str) -> bool { + !sema_ok(code) +} + +/// an untyped parameter used with a binary `+` should default to I64, not Dynamic, because the OneOf constraint says it must be numeric +#[test] +fn oneof_unresolved_var_defaults_to_i64_for_add() { + let program = infer_ok( + r#" +fn double(x) { + return x + x +} +"#, + ); + // find the function and check its parameter type + for stmt in &program.stmts { + if let TypedStmtKind::Function(func) = &stmt.kind { + if func.name == "double" { + assert_eq!( + func.params[0].ty, + InferType::I64, + "unresolved param constrained by OneOf(numerics) should default to I64, got {:?}", + func.params[0].ty + ); + return; + } + } + } + panic!("did not find function 'double' in typed AST"); +} + +/// an untyped parameter used with `*` should also default to I64. +#[test] +fn oneof_unresolved_var_defaults_to_i64_for_mul() { + let program = infer_ok( + r#" +fn square(n) { + return n * n +} +"#, + ); + for stmt in &program.stmts { + if let TypedStmtKind::Function(func) = &stmt.kind { + if func.name == "square" { + assert_eq!( + func.params[0].ty, + InferType::I64, + "unresolved param constrained by OneOf(numerics) via * should default to I64, got {:?}", + func.params[0].ty + ); + return; + } + } + } + panic!("did not find function 'square' in typed AST"); +} + +/// when a Var is constrained by Equal to a concrete type and by OneOf, the Equal binding should take precedence (OneOf just validates) +#[test] +fn oneof_does_not_override_equal_binding() { + let program = infer_ok( + r#" +fn add_i32(a: i32, b: i32) -> i32 { + return a + b +} +"#, + ); + for stmt in &program.stmts { + if let TypedStmtKind::Function(func) = &stmt.kind { + if func.name == "add_i32" { + assert_eq!( + func.return_type, + InferType::I32, + "return type should stay I32 from Equal constraint, got {:?}", + func.return_type + ); + return; + } + } + } + panic!("did not find function 'add_i32' in typed AST"); +} + +/// an untyped param used with bitwise and should default to I64 +/// (since all_integer_types is the option set for bitwise ops) +#[test] +fn oneof_unresolved_var_defaults_to_i64_for_bitwise() { + let program = infer_ok( + r#" +fn mask(x) { + return x & x +} +"#, + ); + for stmt in &program.stmts { + if let TypedStmtKind::Function(func) = &stmt.kind { + if func.name == "mask" { + assert_eq!( + func.params[0].ty, + InferType::I64, + "unresolved param constrained by OneOf(integers) via & should default to I64, got {:?}", + func.params[0].ty + ); + return; + } + } + } + panic!("did not find function 'mask' in typed AST"); +} + +/// a OneOf-constrained type that was force_dynamic'd by an error should remain Dynamic (no crash, no override) +#[test] +fn oneof_on_dynamic_is_harmless() { + // should produce a type error (bool is not numeric), but not crash + assert!( + sema_err( + r#" +fn test() { + let x: bool = true + let y = x + 1 +} +"# + ), + "adding bool + int should fail" + ); +} + +/// when a concrete type passes OneOf validation and the trial unification binds additional Vars, those bindings should be preserved. +#[test] +fn oneof_temp_subst_merged_on_success() { + // validates that the OneOf check for + on I64 successfully + // validates and the program still type-checks correctly + assert!( + sema_ok( + r#" +fn test() -> i64 { + let x: i64 = 10 + let y: i64 = 20 + return x + y +} +"# + ), + "simple add of two i64 should pass" + ); +} + +/// ensure that the first matching option is used for merging, and the overall constraint is not broken by the merge. +#[test] +fn oneof_merge_does_not_break_existing_bindings() { + assert!( + sema_ok( + r#" +fn add(a: i32, b: i32) -> i32 { + return a + b +} + +fn test() -> i32 { + return add(1, 2) +} +"# + ), + "i32 arithmetic should pass with OneOf merge" + ); +} + +/// the OneOf error path should still work: bool is not numeric +#[test] +fn oneof_still_rejects_invalid_types() { + assert!( + sema_err( + r#" +fn test() -> bool { + return true + false +} +"# + ), + "bool + bool should be rejected by OneOf" + ); +} diff --git a/aelys/tests/sema_rollback_tests.rs b/aelys/tests/sema_rollback_tests.rs new file mode 100644 index 0000000..1cfa4cb --- /dev/null +++ b/aelys/tests/sema_rollback_tests.rs @@ -0,0 +1,198 @@ +/// Regression tests for substitution snapshot/rollback on unification failure. +/// +/// When unifying compound types (functions, tuples), sub-components are unified +/// one by one. If a later sub-component fails, bindings from earlier successful +/// sub-unifications must be rolled back to avoid corrupting the substitution. +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_sema::types::{InferType, TypeVarId}; +use aelys_sema::unify::{Substitution, unify}; +use aelys_syntax::Source; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +fn sema_err(code: &str) -> bool { + !sema_ok(code) +} + +// unit tests on Substitution snapshot/restore + +#[test] +fn snapshot_restore_undoes_bindings() { + let mut subst = Substitution::new(); + let v0 = TypeVarId(0); + + let saved = subst.snapshot(); + subst.bind(v0, InferType::I64); + assert_eq!(subst.apply(&InferType::Var(v0)), InferType::I64); + + subst.restore(saved); + // after restore, Var(0) should be unbound again + assert_eq!(subst.apply(&InferType::Var(v0)), InferType::Var(v0)); +} + +#[test] +fn snapshot_restore_preserves_pre_existing_bindings() { + let mut subst = Substitution::new(); + let v0 = TypeVarId(0); + let v1 = TypeVarId(1); + + // bind v0 before the snapshot + subst.bind(v0, InferType::Bool); + + let saved = subst.snapshot(); + subst.bind(v1, InferType::String); + + subst.restore(saved); + // v0 should still be bound (it was in the snapshot) + assert_eq!(subst.apply(&InferType::Var(v0)), InferType::Bool); + // v1 should be unbound (added after snapshot) + assert_eq!(subst.apply(&InferType::Var(v1)), InferType::Var(v1)); +} + +// unit tests: rollback on failed compound unification + +#[test] +fn failed_function_unification_rolls_back_param_bindings() { + // unify(fn(Var(0)) -> string, fn(i64) -> i64) should fail at the return type (string vs i64), but without rollback, Var(0) would remain bound to i64 + let mut subst = Substitution::new(); + let v0 = TypeVarId(0); + + let fn_a = InferType::Function { + params: vec![InferType::Var(v0)], + ret: Box::new(InferType::String), + }; + let fn_b = InferType::Function { + params: vec![InferType::I64], + ret: Box::new(InferType::I64), + }; + + // save state, attempt unification, rollback on failure + let saved = subst.snapshot(); + let result = unify(&fn_a, &fn_b, &mut subst); + assert!(result.is_err(), "return type mismatch should fail"); + + subst.restore(saved); + // Var(0) must not be bound to i64, the partial param binding was rolled back + assert_eq!( + subst.apply(&InferType::Var(v0)), + InferType::Var(v0), + "Var(0) should be unbound after rollback" + ); +} + +#[test] +fn failed_tuple_unification_rolls_back_element_bindings() { + // unify((Var(0), Var(1)), (i64, string)) then + // unify((Var(0), Var(1)), (bool, bool)) should fail because Var(0)=i64 already + // but with rollback on the second attempt we can try cleanly + let mut subst = Substitution::new(); + let v0 = TypeVarId(0); + let v1 = TypeVarId(1); + + // first unification succeeds + let tuple_a = InferType::Tuple(vec![InferType::Var(v0), InferType::Var(v1)]); + let tuple_b = InferType::Tuple(vec![InferType::I64, InferType::String]); + let result = unify(&tuple_a, &tuple_b, &mut subst); + assert!(result.is_ok()); + assert_eq!(subst.apply(&InferType::Var(v0)), InferType::I64); + assert_eq!(subst.apply(&InferType::Var(v1)), InferType::String); + + // second unification would fail (i64 vs bool), but rollback cleans up + let tuple_c = InferType::Tuple(vec![InferType::Bool, InferType::Bool]); + let saved = subst.snapshot(); + let result2 = unify(&tuple_a, &tuple_c, &mut subst); + assert!(result2.is_err()); + + subst.restore(saved); + // bindings from the first (successful) unification are preserved + assert_eq!(subst.apply(&InferType::Var(v0)), InferType::I64); + assert_eq!(subst.apply(&InferType::Var(v1)), InferType::String); +} + +#[test] +fn partial_unification_failure_does_not_corrupt_later_inference() { + // a type error on one variable should not corrupt a different variable's type. + // without rollback, if unify(fn(Var)->Var, fn(i64)->string) fails at return, the param binding Var=i64 would persist and contaminate later uses of that Var + assert!( + sema_err( + r#" +fn takes_int(x: i64) -> i64 { return x } +fn takes_str(x: string) -> string { return x } + +fn main() { + let a = takes_int(42) + let b: string = a +} +"# + ), + "assigning i64 result to string should be rejected" + ); +} + +#[test] +fn type_error_does_not_poison_unrelated_variables() { + // even after a type error, unrelated variables should still be inferred correctly. + assert!( + sema_ok( + r#" +fn good() -> i64 { + let x: i64 = 42 + return x +} +"# + ), + "correct function should pass even with rollback changes" + ); +} + +#[test] +fn function_type_mismatch_still_detected() { + // return a string from an i64 function must still be caught after rollback changes. + assert!( + sema_err( + r#" +fn f() -> i64 { + return "hello" +} +"# + ), + "returning string from i64 function must be rejected" + ); +} + +#[test] +fn multiple_errors_reported_independently() { + // two independent type errors should both be reported, not silently suppressed by Dynamic from the first error + let code = r#" +fn f(x: i64, y: string) -> i64 { + return x +} + +fn main() { + let a: string = 42 + let b: i64 = "hello" +} +"#; + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + let result = TypeInference::infer_program(stmts, src); + assert!(result.is_err(), "should have type errors"); + let errors = result.unwrap_err(); + assert!( + errors.len() >= 2, + "both type errors should be reported, got {} error(s)", + errors.len() + ); +} diff --git a/aelys/tests/sema_scope_tests.rs b/aelys/tests/sema_scope_tests.rs new file mode 100644 index 0000000..0d3dda6 --- /dev/null +++ b/aelys/tests/sema_scope_tests.rs @@ -0,0 +1,239 @@ +/// Tests for scoped signature collection and if-branch scope isolation. +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::TypeInference; +use aelys_syntax::Source; + +fn sema_ok(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_ok() +} + +#[allow(dead_code)] +fn sema_err(code: &str) -> bool { + !sema_ok(code) +} + +#[test] +fn toplevel_fn_not_overwritten_by_same_name_in_if() { + assert!( + sema_ok( + r#" +fn compute(x: i64) -> i64 { return x + 1 } +if true { + fn compute(x: i64) -> i64 { return x + 2 } +} +let result: i64 = compute(10) +"# + ), + "top-level compute() should be preserved after if block with same-name fn" + ); +} + +#[test] +fn toplevel_fn_not_overwritten_by_same_name_in_while() { + assert!( + sema_ok( + r#" +fn compute(x: i64) -> i64 { return x + 1 } +while false { + fn compute(x: i64) -> i64 { return x + 2 } +} +let result: i64 = compute(10) +"# + ), + "top-level compute() should be preserved after while block with same-name fn" + ); +} + +#[test] +fn toplevel_fn_not_overwritten_by_same_name_in_for() { + assert!( + sema_ok( + r#" +fn compute(x: i64) -> i64 { return x + 1 } +for i in 0..0 { + fn compute(x: i64) -> i64 { return x + 2 } +} +let result: i64 = compute(10) +"# + ), + "top-level compute() should be preserved after for block with same-name fn" + ); +} + +#[test] +fn fn_in_unreachable_if_does_not_leak_to_outer_scope() { + assert!( + sema_ok( + r#" +fn greet() -> i64 { return 42 } +if false { + fn unreachable_fn() -> string { return "never runs" } +} +let x: i64 = greet() +"# + ), + "unreachable fn inside if-false should not affect type checking" + ); +} + +#[test] +fn if_then_var_not_visible_after_if() { + assert!( + sema_ok( + r#" +if true { + let x: i64 = 42 +} +let x: string = "hello" +"# + ), + "variable x defined in then-branch should not conflict with x after the if" + ); +} + +#[test] +fn if_else_var_not_visible_after_if() { + assert!( + sema_ok( + r#" +if true { + let a: i64 = 1 +} else { + let b: string = "hi" +} +let b: i64 = 99 +"# + ), + "variable b defined in else-branch should not conflict with b after the if" + ); +} + +#[test] +fn if_then_var_not_visible_in_else() { + assert!( + sema_ok( + r#" +if true { + let val: i64 = 10 +} else { + let val: string = "ten" +} +"# + ), + "then-branch val:i64 should not conflict with else-branch val:string" + ); +} + +#[test] +fn if_branch_scope_with_implicit_return() { + assert!( + sema_ok( + r#" +fn pick(flag: bool) -> i64 { + if flag { + let temp: string = "computing" + 42 + } else { + let temp: i64 = 0 + temp + } +} +let result: i64 = pick(true) +"# + ), + "implicit return through if/else should have scoped branches" + ); +} + +#[test] +fn literal_tracking_does_not_leak_between_functions() { + assert!( + sema_ok( + r#" +fn f() { + let x = 200 +} + +fn g(x: i8) -> i8 { + return x +} +"# + ), + "literal tracking from one function must not pollute another function" + ); +} + +#[test] +fn literal_tracking_respects_inner_scope_shadowing() { + assert!( + sema_ok( + r#" +fn g(x: i8) -> i8 { + { + let x = 200 + } + return x +} +"# + ), + "inner scoped literal shadow must not affect outer return narrowing" + ); +} + +#[test] +fn mutable_shadow_must_not_make_outer_binding_assignable() { + assert!( + sema_err( + r#" +fn bug() { + let x = 1 + { + let mut x = 2 + } + x = 3 +} +"# + ), + "inner let mut x must not allow assigning to outer immutable x" + ); +} + +#[test] +fn function_decl_inside_block_must_not_leak_outside_block_scope() { + assert!( + sema_err( + r#" +{ + fn hidden() -> i64 { return 7 } +} +fn use_it() -> i64 { + return hidden() +} +"# + ), + "function declared in a block must not be callable outside that block" + ); +} + +#[test] +fn block_local_function_can_shadow_outer_same_name() { + assert!( + sema_ok( + r#" +fn f() -> i64 { return 1 } +{ + fn f() -> i64 { return 2 } + let y: i64 = f() +} +let x: i64 = f() +"# + ), + "block-local function should be allowed to shadow outer function name" + ); +} diff --git a/aelys/tests/sema_type_propagation_tests.rs b/aelys/tests/sema_type_propagation_tests.rs new file mode 100644 index 0000000..f0d1764 --- /dev/null +++ b/aelys/tests/sema_type_propagation_tests.rs @@ -0,0 +1,533 @@ +use aelys_frontend::lexer::Lexer; +use aelys_frontend::parser::Parser; +use aelys_sema::{ResolvedType, TypeInference, TypedExprKind, TypedStmtKind}; +use aelys_syntax::Source; + +fn should_fail(code: &str) -> bool { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).is_err() +} + +fn infer_ok(code: &str) -> aelys_sema::TypedProgram { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + TypeInference::infer_program(stmts, src).expect("inference failed") +} + +fn should_pass(code: &str) { + let src = Source::new("", code); + let tokens = Lexer::with_source(src.clone()).scan().expect("lex failed"); + let stmts = Parser::new(tokens, src.clone()) + .parse() + .expect("parse failed"); + match TypeInference::infer_program(stmts, src) { + Ok(_) => {} + Err(errors) => { + for e in &errors { + eprintln!(" ERROR: {}", e); + } + panic!("expected OK, got {} errors", errors.len()); + } + } +} + +#[test] +fn generic_id_wrong_return_type() { + // id returns T, but caller expects string while passing i64 + assert!(should_fail( + r#" +fn id(x: T) -> T { return x } +fn test() -> string { + return id(42) +} +"# + )); +} + +#[test] +fn generic_id_correct_return_type() { + should_pass( + r#" +fn id(x: T) -> T { return x } +fn test() -> i64 { + return id(42) +} +"#, + ); +} + +#[test] +fn generic_two_params_swap_return_wrong() { + // swap returns B. Passing (string, i64) returns i64, but we expect string + assert!(should_fail( + r#" +fn swap(a: A, b: B) -> B { return b } +fn test() -> string { + return swap("hello", 42) +} +"# + )); +} + +#[test] +fn generic_two_params_swap_return_correct() { + should_pass( + r#" +fn swap(a: A, b: B) -> B { return b } +fn test() -> i64 { + return swap("hello", 42) +} +"#, + ); +} + +#[test] +fn generic_called_twice_with_different_types() { + // Each call site should get its own instantiation + should_pass( + r#" +fn id(x: T) -> T { return x } +fn test() { + let a: i64 = id(42) + let b: string = id("hello") +} +"#, + ); +} + +#[test] +fn generic_called_twice_one_wrong() { + // Second call returns i64 but expects string + assert!(should_fail( + r#" +fn id(x: T) -> T { return x } +fn test() { + let a: i64 = id(42) + let b: string = id(99) +} +"# + )); +} + +#[test] +fn generic_same_type_param_used_twice_in_params() { + // first(a: T, b: T) -> T: both args must have the same type + assert!(should_fail( + r#" +fn first(a: T, b: T) -> T { return a } +fn test() -> i64 { + return first(42, "hello") +} +"# + )); +} + +#[test] +fn generic_same_type_param_both_correct() { + should_pass( + r#" +fn first(a: T, b: T) -> T { return a } +fn test() -> i64 { + return first(42, 99) +} +"#, + ); +} + +#[test] +fn if_else_branches_different_types_rejected() { + assert!(should_fail( + r#" +fn test(x: bool) -> i64 { + if x { + 42 + } else { + "hello" + } +} +"# + )); +} + +#[test] +fn nested_if_else_type_mismatch() { + assert!(should_fail( + r#" +fn test(a: bool, b: bool) -> i64 { + if a { + if b { + 42 + } else { + "wrong" + } + } else { + 0 + } +} +"# + )); +} + +#[test] +fn let_annotation_mismatch_with_string() { + assert!(should_fail( + r#" +fn test() { + let x: i64 = "hello" +} +"# + )); +} + +#[test] +fn let_annotation_mismatch_with_bool() { + assert!(should_fail( + r#" +fn test() { + let x: i64 = true +} +"# + )); +} + +#[test] +fn let_annotation_correct() { + should_pass( + r#" +fn test() { + let x: i64 = 42 +} +"#, + ); +} + +#[test] +fn assign_wrong_type_to_variable() { + assert!(should_fail( + r#" +fn test() { + let mut x: i64 = 42 + x = "hello" +} +"# + )); +} + +#[test] +fn assign_bool_to_i64_variable() { + assert!(should_fail( + r#" +fn test() { + let mut x: i64 = 0 + x = true +} +"# + )); +} + +#[test] +fn wrong_number_of_args_extra() { + assert!(should_fail( + r#" +fn add(a: i64, b: i64) -> i64 { return a + b } +fn test() -> i64 { + return add(1, 2, 3) +} +"# + )); +} + +#[test] +fn wrong_number_of_args_fewer() { + assert!(should_fail( + r#" +fn add(a: i64, b: i64) -> i64 { return a + b } +fn test() -> i64 { + return add(1) +} +"# + )); +} + +#[test] +fn constraint_chain_through_variables() { + // x = 42 (i64), y = x (i64), then y used as string → error + assert!(should_fail( + r#" +fn consume_string(s: string) {} +fn test() { + let x = 42 + let y = x + consume_string(y) +} +"# + )); +} + +#[test] +fn constraint_chain_correct() { + should_pass( + r#" +fn double(n: i64) -> i64 { return n + n } +fn test() { + let x = 42 + let y = double(x) + let z = double(y) +} +"#, + ); +} + +#[test] +fn block_implicit_return_type_mismatch() { + assert!(should_fail( + r#" +fn test() -> i64 { + { + "hello" + } +} +"# + )); +} + +#[test] +fn block_implicit_return_correct() { + should_pass( + r#" +fn test() -> i64 { + { + 42 + } +} +"#, + ); +} + +#[test] +fn comparison_between_incompatible_types() { + assert!(should_fail( + r#" +fn test() -> bool { + return 42 > "hello" +} +"# + )); +} + +#[test] +fn comparison_bool_with_integer() { + assert!(should_fail( + r#" +fn test() -> bool { + return true > 5 +} +"# + )); +} + +#[test] +fn addition_string_and_bool() { + assert!(should_fail( + r#" +fn test() { + let x = "hello" + true +} +"# + )); +} + +#[test] +fn subtraction_on_booleans() { + assert!(should_fail( + r#" +fn test() { + let x = true - false +} +"# + )); +} + +#[test] +fn struct_field_wrong_type_in_init() { + assert!(should_fail( + r#" +struct Point { x: i64, y: i64 } +fn test() { + let p = Point { x: "hello", y: 0 } +} +"# + )); +} + +#[test] +fn struct_field_access_used_as_wrong_type() { + assert!(should_fail( + r#" +struct Point { x: i64, y: i64 } +fn consume_string(s: string) {} +fn test() { + let p = Point { x: 1, y: 2 } + consume_string(p.x) +} +"# + )); +} + +#[test] +fn array_used_where_i64_expected() { + assert!(should_fail( + r#" +fn double(n: i64) -> i64 { return n + n } +fn test() { + let arr = [1, 2, 3] + double(arr) +} +"# + )); +} + +#[test] +fn array_element_used_correctly() { + should_pass( + r#" +fn double(n: i64) -> i64 { return n + n } +fn test() { + let arr = [1, 2, 3] + double(arr[0]) +} +"#, + ); +} + +#[test] +fn multiple_independent_errors() { + // both lines should produce errors, second shouldn't be swallowed + assert!(should_fail( + r#" +fn test() { + let x: i64 = "hello" + let y: string = 42 +} +"# + )); +} + +#[test] +fn function_result_plus_number_rejected() { + // can't add a function call result that is string to a number + assert!(should_fail( + r#" +fn greet() -> string { return "hi" } +fn test() -> i64 { + return greet() + 10 +} +"# + )); +} + +#[test] +fn function_result_used_correctly() { + should_pass( + r#" +fn double(n: i64) -> i64 { return n + n } +fn test() -> i64 { + return double(5) + 10 +} +"#, + ); +} + +#[test] +fn void_function_result_used_as_value() { + assert!(should_fail( + r#" +fn do_nothing() {} +fn test() -> i64 { + return do_nothing() +} +"# + )); +} + +#[test] +fn void_function_called_correctly() { + should_pass( + r#" +fn do_nothing() {} +fn test() { + do_nothing() +} +"#, + ); +} + +#[test] +fn recursive_function_returns_wrong_type_in_base() { + assert!(should_fail( + r#" +fn countdown(n: i64) -> i64 { + if n == 0 { + return "done" + } + return countdown(n - 1) +} +"# + )); +} + +#[test] +fn recursive_function_correct() { + should_pass( + r#" +fn factorial(n: i64) -> i64 { + if n <= 1 { + return 1 + } + return n * factorial(n - 1) +} +"#, + ); +} + +/// VecLiteral.element_type must be substituted +/// element_type was cloned verbatim through the substitution pass, so a type annotation that resolved through inference could remain stale. +#[test] +fn vec_literal_element_type_is_substituted() { + let program = infer_ok( + r#" +fn test() { + let v = vec[1, 2, 3] +} +"#, + ); + // walk the AST to find the VecLiteral and check its element_type + for stmt in &program.stmts { + if let TypedStmtKind::Function(func) = &stmt.kind { + for body_stmt in &func.body { + if let TypedStmtKind::Let { initializer, .. } = &body_stmt.kind { + if let TypedExprKind::VecLiteral { element_type, .. } = &initializer.kind { + let et = element_type + .as_ref() + .expect("element_type should be Some for annotated vec"); + assert_eq!( + *et, + ResolvedType::I32, + "VecLiteral.element_type should be I32 after substitution, got {:?}", + et + ); + return; + } + } + } + } + } + panic!("did not find VecLiteral in typed AST"); +} diff --git a/aelys/tests/sized_types_and_structs_tests.rs b/aelys/tests/sized_types_and_structs_tests.rs deleted file mode 100644 index a9ba8d6..0000000 --- a/aelys/tests/sized_types_and_structs_tests.rs +++ /dev/null @@ -1,552 +0,0 @@ -use aelys::run; -use aelys_runtime::Value; -use aelys_sema::types::{InferType, ResolvedType, TypeVarId}; -use aelys_sema::{StructDef, StructField, Substitution, TypeTable}; - -fn run_ok(source: &str) -> Value { - run(source, "test.aelys").expect("program should succeed") -} - -#[allow(dead_code)] -fn run_err(source: &str) -> String { - run(source, "test.aelys") - .expect_err("program should fail") - .to_string() -} - -fn parse(source: &str) -> Vec { - let src = aelys_syntax::Source::new("", source); - let tokens = aelys_frontend::lexer::Lexer::with_source(src.clone()) - .scan() - .unwrap(); - aelys_frontend::parser::Parser::new(tokens, src) - .parse() - .unwrap() -} - -fn infer(source: &str) -> aelys_sema::InferenceResult { - let src = aelys_syntax::Source::new("", source); - let tokens = aelys_frontend::lexer::Lexer::with_source(src.clone()) - .scan() - .unwrap(); - let ast = aelys_frontend::parser::Parser::new(tokens, src.clone()) - .parse() - .unwrap(); - aelys_sema::TypeInference::infer_program_full(ast, src, Default::default(), Default::default()) - .unwrap() -} - -fn make_ann(name: &str) -> aelys_syntax::TypeAnnotation { - aelys_syntax::TypeAnnotation::new(name.to_string(), aelys_syntax::Span::new(0, 0, 1, 1)) -} - -// --------------------------------------------------------------------------- -// InferType sized variants -// --------------------------------------------------------------------------- - -#[test] -fn infer_type_from_annotation_sized_integers() { - assert_eq!(InferType::from_annotation(&make_ann("int")), InferType::I64); - assert_eq!(InferType::from_annotation(&make_ann("i64")), InferType::I64); - assert_eq!(InferType::from_annotation(&make_ann("i32")), InferType::I32); - assert_eq!(InferType::from_annotation(&make_ann("i16")), InferType::I16); - assert_eq!(InferType::from_annotation(&make_ann("i8")), InferType::I8); - assert_eq!(InferType::from_annotation(&make_ann("u64")), InferType::U64); - assert_eq!(InferType::from_annotation(&make_ann("u32")), InferType::U32); - assert_eq!(InferType::from_annotation(&make_ann("u16")), InferType::U16); - assert_eq!(InferType::from_annotation(&make_ann("u8")), InferType::U8); -} - -#[test] -fn infer_type_from_annotation_sized_floats() { - assert_eq!( - InferType::from_annotation(&make_ann("float")), - InferType::F64 - ); - assert_eq!(InferType::from_annotation(&make_ann("f64")), InferType::F64); - assert_eq!(InferType::from_annotation(&make_ann("f32")), InferType::F32); -} - -#[test] -fn infer_type_from_annotation_struct() { - assert_eq!( - InferType::from_annotation(&make_ann("Point")), - InferType::Struct("Point".to_string()) - ); - assert_eq!( - InferType::from_annotation(&make_ann("MyStruct")), - InferType::Struct("MyStruct".to_string()) - ); -} - -#[test] -fn infer_type_from_annotation_case_insensitive_builtins() { - assert_eq!(InferType::from_annotation(&make_ann("Int")), InferType::I64); - assert_eq!( - InferType::from_annotation(&make_ann("Float")), - InferType::F64 - ); - assert_eq!(InferType::from_annotation(&make_ann("I32")), InferType::I32); -} - -#[test] -fn infer_type_is_integer() { - assert!(InferType::I8.is_integer()); - assert!(InferType::I16.is_integer()); - assert!(InferType::I32.is_integer()); - assert!(InferType::I64.is_integer()); - assert!(InferType::U8.is_integer()); - assert!(InferType::U16.is_integer()); - assert!(InferType::U32.is_integer()); - assert!(InferType::U64.is_integer()); - assert!(!InferType::F32.is_integer()); - assert!(!InferType::F64.is_integer()); - assert!(!InferType::Bool.is_integer()); - assert!(!InferType::String.is_integer()); -} - -#[test] -fn infer_type_is_float() { - assert!(InferType::F32.is_float()); - assert!(InferType::F64.is_float()); - assert!(!InferType::I64.is_float()); - assert!(!InferType::Bool.is_float()); -} - -#[test] -fn infer_type_is_numeric() { - assert!(InferType::I64.is_numeric()); - assert!(InferType::F32.is_numeric()); - assert!(!InferType::Bool.is_numeric()); - assert!(!InferType::Struct("X".to_string()).is_numeric()); -} - -#[test] -fn infer_type_sized_variants_are_concrete() { - assert!(InferType::I8.is_concrete()); - assert!(InferType::U64.is_concrete()); - assert!(InferType::F32.is_concrete()); - assert!(InferType::Struct("Foo".to_string()).is_concrete()); - assert!(!InferType::Var(TypeVarId(0)).is_concrete()); - assert!(!InferType::Dynamic.is_concrete()); -} - -#[test] -fn infer_type_sized_variants_no_vars() { - assert!(!InferType::I32.has_vars()); - assert!(!InferType::U8.has_vars()); - assert!(!InferType::F64.has_vars()); - assert!(!InferType::Struct("Vec3".to_string()).has_vars()); -} - -#[test] -fn infer_type_display_sized() { - assert_eq!(format!("{}", InferType::I8), "i8"); - assert_eq!(format!("{}", InferType::I64), "i64"); - assert_eq!(format!("{}", InferType::U32), "u32"); - assert_eq!(format!("{}", InferType::F32), "f32"); - assert_eq!(format!("{}", InferType::F64), "f64"); - assert_eq!( - format!("{}", InferType::Struct("Point".to_string())), - "Point" - ); -} - -// --------------------------------------------------------------------------- -// ResolvedType sized variants -// --------------------------------------------------------------------------- - -#[test] -fn resolved_type_is_integer() { - assert!(ResolvedType::I64.is_integer()); - assert!(ResolvedType::U8.is_integer()); - assert!(!ResolvedType::F64.is_integer()); -} - -#[test] -fn resolved_type_is_float() { - assert!(ResolvedType::F64.is_float()); - assert!(ResolvedType::F32.is_float()); - assert!(!ResolvedType::I64.is_float()); -} - -#[test] -fn resolved_type_is_integer_ish() { - assert!(ResolvedType::I64.is_integer_ish()); - assert!(ResolvedType::U32.is_integer_ish()); - assert!(ResolvedType::Uncertain(Box::new(ResolvedType::I64)).is_integer_ish()); - assert!(!ResolvedType::F64.is_integer_ish()); - assert!(!ResolvedType::Dynamic.is_integer_ish()); -} - -#[test] -fn resolved_type_from_infer_type_sized() { - assert_eq!( - ResolvedType::from_infer_type(&InferType::I8), - ResolvedType::I8 - ); - assert_eq!( - ResolvedType::from_infer_type(&InferType::U64), - ResolvedType::U64 - ); - assert_eq!( - ResolvedType::from_infer_type(&InferType::F32), - ResolvedType::F32 - ); - assert_eq!( - ResolvedType::from_infer_type(&InferType::Struct("P".to_string())), - ResolvedType::Struct("P".to_string()) - ); -} - -// --------------------------------------------------------------------------- -// TypeTable -// --------------------------------------------------------------------------- - -#[test] -fn type_table_register_and_get() { - let mut table = TypeTable::new(); - table.register_struct(StructDef { - name: "Point".to_string(), - type_params: Vec::new(), - fields: vec![ - StructField { - name: "x".to_string(), - ty: InferType::F64, - }, - StructField { - name: "y".to_string(), - ty: InferType::F64, - }, - ], - }); - - assert!(table.has_struct("Point")); - assert!(!table.has_struct("Line")); - - let def = table.get_struct("Point").unwrap(); - assert_eq!(def.fields.len(), 2); - assert_eq!(def.fields[0].name, "x"); - assert_eq!(def.fields[1].ty, InferType::F64); -} - -// --------------------------------------------------------------------------- -// Parser: struct declarations -// --------------------------------------------------------------------------- - -#[test] -fn parse_struct_declaration() { - let stmts = parse("struct Point { x: f64, y: f64 }"); - assert_eq!(stmts.len(), 1); - - match &stmts[0].kind { - aelys_syntax::StmtKind::StructDecl { - name, - type_params, - fields, - is_pub, - } => { - assert_eq!(name, "Point"); - assert!(type_params.is_empty()); - assert!(!is_pub); - assert_eq!(fields.len(), 2); - assert_eq!(fields[0].name, "x"); - assert_eq!(fields[1].name, "y"); - } - _ => panic!("expected StructDecl"), - } -} - -#[test] -fn parse_pub_struct_declaration() { - let stmts = parse("pub struct Color { r: u8, g: u8, b: u8 }"); - assert_eq!(stmts.len(), 1); - - match &stmts[0].kind { - aelys_syntax::StmtKind::StructDecl { - name, - is_pub, - fields, - .. - } => { - assert_eq!(name, "Color"); - assert!(is_pub); - assert_eq!(fields.len(), 3); - } - _ => panic!("expected StructDecl"), - } -} - -#[test] -fn parse_struct_trailing_comma() { - let stmts = parse("struct Pair { a: int, b: int, }"); - match &stmts[0].kind { - aelys_syntax::StmtKind::StructDecl { fields, .. } => { - assert_eq!(fields.len(), 2); - } - _ => panic!("expected StructDecl"), - } -} - -// --------------------------------------------------------------------------- -// Parser: struct literals -// --------------------------------------------------------------------------- - -#[test] -fn parse_struct_literal() { - let stmts = parse("Point { x: 1, y: 2 }"); - assert_eq!(stmts.len(), 1); - - match &stmts[0].kind { - aelys_syntax::StmtKind::Expression(expr) => match &expr.kind { - aelys_syntax::ExprKind::StructLiteral { name, fields } => { - assert_eq!(name, "Point"); - assert_eq!(fields.len(), 2); - assert_eq!(fields[0].name, "x"); - assert_eq!(fields[1].name, "y"); - } - _ => panic!("expected StructLiteral, got {:?}", expr.kind), - }, - _ => panic!("expected expression statement"), - } -} - -#[test] -fn parse_uppercase_identifier_without_brace_is_not_struct_literal() { - let stmts = parse("let DEBUG = true"); - match &stmts[0].kind { - aelys_syntax::StmtKind::Let { initializer, .. } => { - assert!(matches!( - initializer.kind, - aelys_syntax::ExprKind::Bool(true) - )); - } - _ => panic!("expected let statement"), - } -} - -#[test] -fn parse_uppercase_var_before_block_is_not_struct_literal() { - let stmts = parse("if TRUE { 1 } else { 0 }"); - assert!(matches!(stmts[0].kind, aelys_syntax::StmtKind::If { .. })); -} - -// --------------------------------------------------------------------------- -// Lexer: struct keyword -// --------------------------------------------------------------------------- - -#[test] -fn lexer_recognizes_struct_keyword() { - let src = aelys_syntax::Source::new("", "struct"); - let tokens = aelys_frontend::lexer::Lexer::with_source(src) - .scan() - .unwrap(); - assert!(matches!(tokens[0].kind, aelys_syntax::TokenKind::Struct)); -} - -// --------------------------------------------------------------------------- -// Sema: type inference with sized types -// --------------------------------------------------------------------------- - -#[test] -fn infer_int_literal_as_i64() { - let result = infer("let x = 42"); - let stmt = &result.program.stmts[0]; - match &stmt.kind { - aelys_sema::TypedStmtKind::Let { var_type, .. } => { - let resolved = ResolvedType::from_infer_type(var_type); - assert!(resolved.is_integer()); - } - _ => panic!("expected Let"), - } -} - -#[test] -fn infer_float_literal_as_f64() { - let result = infer("let x = 3.14"); - let stmt = &result.program.stmts[0]; - match &stmt.kind { - aelys_sema::TypedStmtKind::Let { var_type, .. } => { - let resolved = ResolvedType::from_infer_type(var_type); - assert!(resolved.is_float()); - } - _ => panic!("expected Let"), - } -} - -#[test] -fn infer_struct_declaration_populates_type_table() { - let result = infer("struct Vec2 { x: f64, y: f64 }"); - assert!(result.type_table.has_struct("Vec2")); - let def = result.type_table.get_struct("Vec2").unwrap(); - assert_eq!(def.fields.len(), 2); - assert_eq!(def.fields[0].ty, InferType::F64); -} - -#[test] -fn infer_struct_literal_type() { - let result = infer( - r#" - struct Point { x: f64, y: f64 } - let p = Point { x: 1.0, y: 2.0 } - "#, - ); - let let_stmt = &result.program.stmts[1]; - match &let_stmt.kind { - aelys_sema::TypedStmtKind::Let { var_type, .. } => { - assert_eq!(*var_type, InferType::Struct("Point".to_string())); - } - _ => panic!("expected Let"), - } -} - -#[test] -fn infer_struct_field_access_type() { - let result = infer( - r#" - struct Pair { a: int, b: int } - let p = Pair { a: 10, b: 20 } - p.a - "#, - ); - let expr_stmt = &result.program.stmts[2]; - match &expr_stmt.kind { - aelys_sema::TypedStmtKind::Expression(expr) => { - assert!(matches!( - &expr.kind, - aelys_sema::TypedExprKind::Member { .. } - )); - } - _ => panic!("expected Expression"), - } -} - -// --------------------------------------------------------------------------- -// Sema: unification with sized types -// --------------------------------------------------------------------------- - -#[test] -fn unify_same_sized_types() { - let mut subst = Substitution::new(); - assert!(aelys_sema::unify::unify(&InferType::I32, &InferType::I32, &mut subst).is_ok()); - assert!(aelys_sema::unify::unify(&InferType::U64, &InferType::U64, &mut subst).is_ok()); - assert!(aelys_sema::unify::unify(&InferType::F32, &InferType::F32, &mut subst).is_ok()); -} - -#[test] -fn unify_different_sized_types_fails() { - let mut subst = Substitution::new(); - assert!(aelys_sema::unify::unify(&InferType::I32, &InferType::I64, &mut subst).is_err()); - assert!(aelys_sema::unify::unify(&InferType::F32, &InferType::F64, &mut subst).is_err()); - assert!(aelys_sema::unify::unify(&InferType::I64, &InferType::F64, &mut subst).is_err()); -} - -#[test] -fn unify_struct_nominal_same_name() { - let mut subst = Substitution::new(); - let a = InferType::Struct("Point".to_string()); - let b = InferType::Struct("Point".to_string()); - assert!(aelys_sema::unify::unify(&a, &b, &mut subst).is_ok()); -} - -#[test] -fn unify_struct_nominal_different_name_fails() { - let mut subst = Substitution::new(); - let a = InferType::Struct("Point".to_string()); - let b = InferType::Struct("Color".to_string()); - assert!(aelys_sema::unify::unify(&a, &b, &mut subst).is_err()); -} - -// --------------------------------------------------------------------------- -// E2E: sized type annotations work through the VM -// --------------------------------------------------------------------------- - -#[test] -fn e2e_i64_annotation() { - assert_eq!(run_ok("let x: i64 = 42\nx").as_int(), Some(42)); -} - -#[test] -fn e2e_int_alias_still_works() { - assert_eq!(run_ok("let x: int = 99\nx").as_int(), Some(99)); -} - -#[test] -fn e2e_f64_annotation() { - let v = run_ok("let x: f64 = 2.718\nx"); - assert!((v.as_float().unwrap() - std::f64::consts::E).abs() < 0.001); -} - -#[test] -fn e2e_float_alias_still_works() { - let v = run_ok("let x: float = 1.5\nx"); - assert!((v.as_float().unwrap() - 1.5).abs() < 0.001); -} - -#[test] -fn e2e_i64_arithmetic() { - let v = run_ok("let a: i64 = 10\nlet b: i64 = 20\na + b"); - assert_eq!(v.as_int(), Some(30)); -} - -#[test] -fn e2e_f64_arithmetic() { - let v = run_ok("let a: f64 = 1.5\nlet b: f64 = 2.5\na * b"); - assert!((v.as_float().unwrap() - 3.75).abs() < 0.001); -} - -#[test] -fn e2e_struct_declaration_is_noop_in_vm() { - let v = run_ok("struct Foo { x: int }\n42"); - assert_eq!(v.as_int(), Some(42)); -} - -#[test] -fn e2e_typed_for_loop() { - let v = run_ok( - r#" - let mut sum: i64 = 0 - for i in 1..=10 { - sum += i - } - sum - "#, - ); - assert_eq!(v.as_int(), Some(55)); -} - -#[test] -fn e2e_typed_function_params() { - let v = run_ok( - r#" - fn add(a: i64, b: i64) -> i64 { - return a + b - } - add(100, 200) - "#, - ); - assert_eq!(v.as_int(), Some(300)); -} - -#[test] -fn e2e_typed_lambda() { - let v = run_ok( - r#" - let mul = fn(a: i64, b: i64) -> i64 { return a * b } - mul(7, 8) - "#, - ); - assert_eq!(v.as_int(), Some(56)); -} - -#[test] -fn e2e_uppercase_variable_not_confused_with_struct() { - let v = run_ok( - r#" - let MAX = 100 - let result = if MAX > 50 { 1 } else { 0 } - result - "#, - ); - assert_eq!(v.as_int(), Some(1)); -} diff --git a/aelys/tests/stdlib_fs_tests.rs b/aelys/tests/stdlib_fs_tests.rs deleted file mode 100644 index ab848e5..0000000 --- a/aelys/tests/stdlib_fs_tests.rs +++ /dev/null @@ -1,344 +0,0 @@ -mod common; -use common::*; -use std::fs; -use tempfile::tempdir; - -#[test] -fn fs_write_and_read_text() { - let dir = tempdir().unwrap(); - let path = dir.path().join("test.txt"); - let path_str = path.display().to_string().replace('\\', "/"); - - let code = format!( - r#" -needs std.fs -fs.write_text("{}", "hello world") -fs.read_text("{}") - "#, - path_str, path_str - ); - - let err = run_aelys_err(&code); - // Without capability, should fail - assert!(err.contains("capability") || err.contains("permission")); -} - -#[test] -fn fs_open_read_close() { - let dir = tempdir().unwrap(); - let test_file = dir.path().join("data.txt"); - fs::write(&test_file, "test content").unwrap(); - let path_str = test_file.display().to_string().replace('\\', "/"); - - let code = format!( - r#" -needs std.fs -let f = fs.open("{}", "r") -let data = fs.read(f) -fs.close(f) -42 -"#, - path_str - ); - - // Will fail without capability - let err = run_aelys_err(&code); - assert!(err.contains("capability") || err.contains("permission") || err.contains("denied")); -} - -#[test] -fn fs_open_invalid_mode() { - let dir = tempdir().unwrap(); - let path = dir.path().join("test.txt"); - let path_str = path.display().to_string().replace('\\', "/"); - - let code = format!( - r#" -needs std.fs -fs.open("{}", "xyz") -"#, - path_str - ); - - let err = run_aelys_err(&code); - assert!(err.contains("invalid") || err.contains("mode") || err.contains("capability")); -} - -#[test] -fn fs_close_invalid_handle() { - let code = r#" -needs std.fs -fs.close(999) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("handle") || err.contains("capability")); -} - -#[test] -fn fs_read_line_eof_returns_null() { - // Will fail without capability, but tests the expected behavior - let dir = tempdir().unwrap(); - let test_file = dir.path().join("lines.txt"); - fs::write(&test_file, "line1\nline2\n").unwrap(); - let path_str = test_file.display().to_string().replace('\\', "/"); - - let code = format!( - r#" -needs std.fs -let f = fs.open("{}", "r") -let l1 = fs.read_line(f) -let l2 = fs.read_line(f) -let eof = fs.read_line(f) -fs.close(f) -42 -"#, - path_str - ); - - let err = run_aelys_err(&code); - assert!(err.contains("capability") || err.contains("denied")); -} - -#[test] -fn fs_read_bytes_negative() { - let code = r#" -needs std.fs -let f = 1 -fs.read_bytes(f, -10) -"#; - let err = run_aelys_err(code); - assert!(err.contains("negative") || err.contains("capability")); -} - -#[test] -fn fs_read_bytes_exceeds_max() { - let code = r#" -needs std.fs -let f = 1 -fs.read_bytes(f, 20000000) -"#; - let err = run_aelys_err(code); - assert!(err.contains("max") || err.contains("MAX") || err.contains("capability")); -} - -#[test] -fn fs_write_not_opened_for_writing() { - let dir = tempdir().unwrap(); - let test_file = dir.path().join("readonly.txt"); - fs::write(&test_file, "data").unwrap(); - let path_str = test_file.display().to_string().replace('\\', "/"); - - let code = format!( - r#" -needs std.fs -let f = fs.open("{}", "r") -fs.write(f, "new data") -"#, - path_str - ); - - let err = run_aelys_err(&code); - assert!(err.contains("writing") || err.contains("capability")); -} - -#[test] -fn fs_exists_nonexistent() { - let code = r#" -needs std.fs -fs.exists("/nonexistent/path/nowhere.txt") -"#; - let err = run_aelys_err(code); - assert!(err.contains("capability") || err.contains("denied")); -} - -#[test] -fn fs_basename_and_dirname() { - let code = r#" -needs std.fs -let base = fs.basename("/foo/bar/test.txt") -let dir = fs.dirname("/foo/bar/test.txt") -42 -"#; - let err = run_aelys_err(code); - assert!(err.contains("capability")); -} - -#[test] -fn fs_extension_extraction() { - let code = r#" -needs std.fs -fs.extension("file.rs") -"#; - let err = run_aelys_err(code); - assert!(err.contains("capability")); -} - -#[test] -fn fs_mkdir_fails_without_capability() { - let code = r#" -needs std.fs -fs.mkdir("/tmp/test_aelys") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_delete_fails_without_capability() { - let code = r#" -needs std.fs -fs.delete("/tmp/somefile.txt") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_rename_fails_without_capability() { - let code = r#" -needs std.fs -fs.rename("/tmp/old.txt", "/tmp/new.txt") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_copy_fails_without_capability() { - let code = r#" -needs std.fs -fs.copy("/tmp/src.txt", "/tmp/dst.txt") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_readdir_fails_without_capability() { - let code = r#" -needs std.fs -fs.readdir("/tmp") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_join_absolute_path_rejected() { - // This is already tested in security_audit_tests.rs - // but worth repeating - let code = r#" -needs std.fs -fs.join("/app", "/etc/passwd") -"#; - let err = run_aelys_err(code); - assert!(err.contains("absolute") || err.contains("capability")); -} - -#[test] -fn fs_join_parent_escape() { - let code = r#" -needs std.fs -fs.join("/app/data", "../../etc/passwd") -"#; - let err = run_aelys_err(code); - assert!(err.contains("escapes") || err.contains("capability")); -} - -#[test] -fn fs_absolute_nonexistent() { - let code = r#" -needs std.fs -fs.absolute("/nonexistent/path") -"#; - let err = run_aelys_err(code); - assert!(err.contains("capability") || err.contains("failed")); -} - -#[test] -fn fs_write_line_works() { - let code = r#" -needs std.fs -let f = 1 -fs.write_line(f, "test") -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn fs_is_file_and_is_dir() { - let code = r#" -needs std.fs -fs.is_file("/tmp") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_size_of_nonexistent() { - let code = r#" -needs std.fs -fs.size("/nonexistent") -"#; - let err = run_aelys_err(code); - assert!(err.contains("capability") || err.contains("failed")); -} - -#[test] -fn fs_rmdir_fails_without_capability() { - let code = r#" -needs std.fs -fs.rmdir("/tmp/testdir") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_mkdir_all_fails_without_capability() { - let code = r#" -needs std.fs -fs.mkdir_all("/tmp/a/b/c") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_append_text_fails_without_capability() { - let code = r#" -needs std.fs -fs.append_text("/tmp/log.txt", "new line") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn fs_double_close() { - let code = r#" -needs std.fs -let f = 1 -fs.close(f) -fs.close(f) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn fs_read_after_close() { - let code = r#" -needs std.fs -let f = 1 -fs.close(f) -fs.read(f) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn fs_write_after_close() { - let code = r#" -needs std.fs -let f = 1 -fs.close(f) -fs.write(f, "data") -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} diff --git a/aelys/tests/stdlib_math_tests.rs b/aelys/tests/stdlib_math_tests.rs deleted file mode 100644 index e036e2f..0000000 --- a/aelys/tests/stdlib_math_tests.rs +++ /dev/null @@ -1,514 +0,0 @@ -mod common; -use common::*; - -#[test] -fn math_constants_defined() { - let code = r#" -if PI > 3.14 and PI < 3.15 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn math_e_constant() { - let code = r#" -if E > 2.71 and E < 2.72 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn math_tau_is_two_pi() { - let code = r#" -if TAU > 6.28 and TAU < 6.29 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sqrt_basic() { - let code = r#" -let r = sqrt(16.0) -if r > 3.9 and r < 4.1 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sqrt_negative_is_nan() { - let code = r#" -let r = sqrt(-1.0) -if is_nan(r) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn cbrt_basic() { - let code = r#" -let r = cbrt(27.0) -if r > 2.9 and r < 3.1 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn abs_int_preserves_type() { - let code = r#" -abs(-42) -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn abs_float() { - let code = r#" -let r = abs(-3.14) -if r > 3.13 and r < 3.15 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sign_positive() { - let code = r#" -sign(42) -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sign_negative() { - let code = r#" -sign(-10) -"#; - assert_aelys_int(code, -1); -} - -#[test] -fn sign_zero() { - let code = r#" -sign(0) -"#; - assert_aelys_int(code, 0); -} - -#[test] -fn sin_zero() { - let code = r#" -let r = sin(0.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn cos_zero() { - let code = r#" -let r = cos(0.0) -if r > 0.99 and r < 1.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn tan_zero() { - let code = r#" -let r = tan(0.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn asin_zero() { - let code = r#" -let r = asin(0.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn acos_one() { - let code = r#" -let r = acos(1.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn atan_zero() { - let code = r#" -let r = atan(0.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn atan2_basic() { - let code = r#" -let r = atan2(0.0, 1.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sinh_zero() { - let code = r#" -let r = sinh(0.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn cosh_zero() { - let code = r#" -let r = cosh(0.0) -if r > 0.99 and r < 1.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn tanh_zero() { - let code = r#" -let r = tanh(0.0) -if r > -0.01 and r < 0.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn exp_zero() { - let code = r#" -let r = exp(0.0) -if r > 0.99 and r < 1.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn log_e() { - let code = r#" -let r = log(E) -if r > 0.99 and r < 1.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn log10_hundred() { - let code = r#" -let r = log10(100.0) -if r > 1.99 and r < 2.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn log2_eight() { - let code = r#" -let r = log2(8.0) -if r > 2.99 and r < 3.01 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn pow_int_small_exp() { - let code = r#" -pow(2, 10) -"#; - assert_aelys_int(code, 1024); -} - -#[test] -fn pow_float() { - let code = r#" -let r = pow(2.0, 3.0) -if r > 7.9 and r < 8.1 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn floor_positive() { - let code = r#" -floor(3.7) -"#; - assert_aelys_int(code, 3); -} - -#[test] -fn floor_negative() { - let code = r#" -floor(-2.3) -"#; - assert_aelys_int(code, -3); -} - -#[test] -fn ceil_positive() { - let code = r#" -ceil(3.2) -"#; - assert_aelys_int(code, 4); -} - -#[test] -fn ceil_negative() { - let code = r#" -ceil(-2.7) -"#; - assert_aelys_int(code, -2); -} - -#[test] -fn round_half_up() { - let code = r#" -round(3.5) -"#; - assert_aelys_int(code, 4); -} - -#[test] -fn round_half_down() { - let code = r#" -round(3.4) -"#; - assert_aelys_int(code, 3); -} - -#[test] -fn trunc_positive() { - let code = r#" -trunc(3.9) -"#; - assert_aelys_int(code, 3); -} - -#[test] -fn trunc_negative() { - let code = r#" -trunc(-3.9) -"#; - assert_aelys_int(code, -3); -} - -#[test] -fn min_ints() { - let code = r#" -min(5, 3) -"#; - assert_aelys_int(code, 3); -} - -#[test] -fn min_floats() { - let code = r#" -let r = min(5.5, 3.3) -if r > 3.2 and r < 3.4 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn max_ints() { - let code = r#" -max(5, 3) -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn max_floats() { - let code = r#" -let r = max(5.5, 3.3) -if r > 5.4 and r < 5.6 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn clamp_below_range() { - let code = r#" -clamp(1, 5, 10) -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn clamp_above_range() { - let code = r#" -clamp(15, 5, 10) -"#; - assert_aelys_int(code, 10); -} - -#[test] -fn clamp_in_range() { - let code = r#" -clamp(7, 5, 10) -"#; - assert_aelys_int(code, 7); -} - -#[test] -fn deg_to_rad() { - let code = r#" -let r = deg_to_rad(180.0) -if r > 3.14 and r < 3.15 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn rad_to_deg() { - let code = r#" -let r = rad_to_deg(PI) -if r > 179.9 and r < 180.1 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn hypot_3_4_5() { - let code = r#" -let r = hypot(3.0, 4.0) -if r > 4.9 and r < 5.1 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn fmod_basic() { - let code = r#" -let r = fmod(7.5, 2.0) -if r > 1.4 and r < 1.6 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn is_nan_on_nan() { - let code = r#" -let nan = sqrt(-1.0) -if is_nan(nan) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn is_nan_on_int() { - let code = r#" -if is_nan(42) { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn is_inf_on_infinity() { - let code = r#" -if is_inf(INF) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn is_inf_on_normal() { - let code = r#" -if is_inf(42.0) { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn is_finite_on_normal() { - let code = r#" -if is_finite(42.0) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn is_finite_on_nan() { - let code = r#" -let nan = sqrt(-1.0) -if is_finite(nan) { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn is_finite_on_infinity() { - let code = r#" -if is_finite(INF) { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn neg_infinity_constant() { - let code = r#" -if is_inf(NEG_INF) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sign_nan() { - let code = r#" -let s = sign(sqrt(-1.0)) -if is_nan(s) { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn pow_overflow() { - let code = r#" -let r = pow(2, 100) -if r > 0.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn randint_in_range() { - let code = r#" -let val = randint(1, 10) -if val >= 1 and val <= 10 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn randint_single_value() { - let code = r#" -randint(42, 42) -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn randint_negative_range() { - let code = r#" -let val = randint(-10, -5) -if val >= -10 and val <= -5 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn randint_large_range() { - let code = r#" -let val = randint(0, 1000) -if val >= 0 and val <= 1000 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} diff --git a/aelys/tests/stdlib_net_tests.rs b/aelys/tests/stdlib_net_tests.rs deleted file mode 100644 index a7e90b1..0000000 --- a/aelys/tests/stdlib_net_tests.rs +++ /dev/null @@ -1,523 +0,0 @@ -mod common; -use common::*; - -#[test] -fn tcp_connect_and_close() { - let code = r#" -needs std.net -let sock = net.connect("www.google.com", 80) -net.close(sock) -42 -"#; - // Requires network capability - either succeeds or capability denied - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(42)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn net_invalid_port_negative() { - let code = r#" -needs std.net -net.connect("localhost", -1) -"#; - let err = run_aelys_err(code); - assert!(err.contains("port") || err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn invalid_port_too_large() { - let code = r#" -needs std.net -net.connect("localhost", 99999) -"#; - let err = run_aelys_err(code); - assert!(err.contains("port") || err.contains("capability")); -} - -#[test] -fn connect_timeout_unreachable() { - // 10.255.255.1 is typically unreachable - let code = r#" -needs std.net -net.connect("10.255.255.1", 9999) -"#; - let err = run_aelys_err(code); - assert!( - err.contains("failed") - || err.contains("connection") - || err.contains("timeout") - || err.contains("capability") - ); -} - -#[test] -fn invalid_socket_handle() { - let code = r#" -needs std.net -net.send(999, "test") -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn recv_on_invalid_handle() { - let code = r#" -needs std.net -net.recv(123) -"#; - let err = run_aelys_err(code); - assert!( - err.to_lowercase().contains("invalid") - || err.to_lowercase().contains("handle") - || err.contains("capability") - ); -} - -#[test] -fn recv_bytes_negative_max() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.recv_bytes(s, -5) -"#; - let err = run_aelys_err(code); - assert!(err.contains("negative") || err.contains("capability")); -} - -#[test] -fn recv_bytes_exceeds_max_buffer() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.recv_bytes(s, 17000000) -"#; - let err = run_aelys_err(code); - assert!( - err.contains("max") - || err.contains("buffer") - || err.contains("exceeds") - || err.contains("capability") - ); -} - -#[test] -fn http_get_request() { - let code = r#" -needs std.net -let sock = net.connect("www.example.com", 80) -net.send(sock, "GET / HTTP/1.0\r\nHost: www.example.com\r\n\r\n") -let response = net.recv(sock) -net.close(sock) -42 -"#; - // Requires network capability - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(42)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn set_timeout_invalid_handle() { - let code = r#" -needs std.net -net.set_timeout(777, 1000) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn set_timeout_negative_ms() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.set_timeout(s, -100) -"#; - let err = run_aelys_err(code); - assert!(err.contains("negative") || err.contains("capability")); -} - -#[test] -fn set_nodelay_works() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.set_nodelay(s, true) -net.close(s) -1 -"#; - // Requires network capability - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(1)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn shutdown_modes() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.shutdown(s, "both") -net.close(s) -1 -"#; - // Requires network capability - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(1)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn shutdown_invalid_mode() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.shutdown(s, "invalid") -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("mode") || err.contains("capability")); -} - -#[test] -fn local_and_peer_addr() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -let local = net.local_addr(s) -let peer = net.peer_addr(s) -net.close(s) -1 -"#; - // Requires network capability - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(1)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn listen_invalid_port() { - let code = r#" -needs std.net -net.listen("0.0.0.0", -5) -"#; - let err = run_aelys_err(code); - assert!(err.contains("port") || err.contains("capability")); -} - -#[test] -fn close_invalid_handle() { - let code = r#" -needs std.net -net.close(456) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn recv_line_basic() { - // Hard to test without a real server - // Just test invalid handle - let code = r#" -needs std.net -net.recv_line(999) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn double_close() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.close(s) -net.close(s) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn send_after_close() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.close(s) -net.send(s, "data") -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn connect_dns_failure() { - let code = r#" -needs std.net -net.connect("this.domain.does.not.exist.anywhere.invalid", 80) -"#; - let err = run_aelys_err(code); - assert!( - err.contains("resolve") - || err.contains("failed") - || err.contains("connection") - || err.contains("capability") - ); -} - -#[test] -fn local_addr_after_close() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.close(s) -net.local_addr(s) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn peer_addr_invalid() { - let code = r#" -needs std.net -net.peer_addr(12345) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn multiple_connections() { - let code = r#" -needs std.net -let s1 = net.connect("www.google.com", 80) -let s2 = net.connect("www.example.com", 80) -net.close(s1) -net.close(s2) -42 -"#; - // Requires network capability - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(42)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn zero_timeout_disables() { - let code = r#" -needs std.net -let s = net.connect("www.google.com", 80) -net.set_timeout(s, 0) -net.close(s) -1 -"#; - // Requires network capability - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(1)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn udp_bind_and_close() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -net.close(sock) -42 -"#; - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(42)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn udp_bind_invalid_port() { - let code = r#" -needs std.net -net.udp_bind("127.0.0.1", -1) -"#; - let err = run_aelys_err(code); - assert!(err.contains("port") || err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn udp_bind_port_too_large() { - let code = r#" -needs std.net -net.udp_bind("127.0.0.1", 99999) -"#; - let err = run_aelys_err(code); - assert!(err.contains("port") || err.contains("capability")); -} - -#[test] -fn udp_local_addr() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -let addr = net.local_addr(sock) -net.close(sock) -42 -"#; - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(42)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn udp_set_timeout() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -net.set_timeout(sock, 1000) -net.close(sock) -1 -"#; - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(1)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn udp_set_broadcast() { - let code = r#" -needs std.net -let sock = net.udp_bind("0.0.0.0", 0) -net.udp_set_broadcast(sock, true) -net.close(sock) -1 -"#; - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(1)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn udp_send_to_and_recv_from() { - let code = r#" -needs std.net -let s1 = net.udp_bind("127.0.0.1", 0) -let s2 = net.udp_bind("127.0.0.1", 0) -let addr2 = net.local_addr(s2) -net.set_timeout(s2, 2000) -net.udp_send_to(s1, "hello udp", addr2) -let data = net.udp_recv_from(s2, 1024) -net.close(s1) -net.close(s2) -data -"#; - let result = run_aelys_result(code); - match result { - Ok(_) => {} // data received successfully - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn udp_connected_send_recv() { - let code = r#" -needs std.net -let s1 = net.udp_bind("127.0.0.1", 0) -let s2 = net.udp_bind("127.0.0.1", 0) -let addr1 = net.local_addr(s1) -let addr2 = net.local_addr(s2) -net.set_timeout(s1, 2000) -net.set_timeout(s2, 2000) -net.udp_send_to(s1, "ping", addr2) -let data = net.udp_recv_from(s2, 1024) -net.close(s1) -net.close(s2) -42 -"#; - let result = run_aelys_result(code); - match result { - Ok(v) => assert_eq!(v.as_int(), Some(42)), - Err(e) => assert!(e.contains("capability")), - } -} - -#[test] -fn udp_recv_from_negative_max() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -net.udp_recv_from(sock, -5) -"#; - let err = run_aelys_err(code); - assert!(err.contains("negative") || err.contains("non-negative") || err.contains("capability")); -} - -#[test] -fn udp_recv_from_exceeds_max_buffer() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -net.udp_recv_from(sock, 17000000) -"#; - let err = run_aelys_err(code); - assert!( - err.contains("max") - || err.contains("buffer") - || err.contains("exceeds") - || err.contains("capability") - ); -} - -#[test] -fn udp_recv_negative_max() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -net.udp_recv(sock, -1) -"#; - let err = run_aelys_err(code); - assert!(err.contains("negative") || err.contains("non-negative") || err.contains("capability")); -} - -#[test] -fn udp_connect_invalid_port() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -net.udp_connect(sock, "127.0.0.1", -1) -"#; - let err = run_aelys_err(code); - assert!(err.contains("port") || err.contains("invalid") || err.contains("capability")); -} - -#[test] -fn udp_close_invalid_handle() { - let code = r#" -needs std.net -let sock = net.udp_bind("127.0.0.1", 0) -net.close(sock) -net.close(sock) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("capability")); -} diff --git a/aelys/tests/stdlib_string_tests.rs b/aelys/tests/stdlib_string_tests.rs deleted file mode 100644 index b05e2df..0000000 --- a/aelys/tests/stdlib_string_tests.rs +++ /dev/null @@ -1,559 +0,0 @@ -mod common; -use common::*; - -#[test] -fn string_len_basic() { - let code = r#" -"hello".len() -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn string_len_empty() { - let code = r#" -"".len() -"#; - assert_aelys_int(code, 0); -} - -#[test] -fn string_char_len_ascii() { - let code = r#" -"hello".char_len() -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn string_char_len_unicode() { - let code = r#" -"héllo".char_len() -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn string_char_at_valid() { - let code = r#" -let c = "hello".char_at(1) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_char_at_negative() { - let code = r#" -let c = "hello".char_at(-1) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_char_at_out_of_bounds() { - let code = r#" -let c = "hello".char_at(100) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_byte_at_valid() { - let code = r#" -"ABC".byte_at(0) -"#; - assert_aelys_int(code, 65); // 'A' -} - -#[test] -fn string_byte_at_out_of_bounds() { - let code = r#" -"hi".byte_at(10) -"#; - assert_aelys_int(code, -1); -} - -#[test] -fn string_byte_at_negative() { - let code = r#" -"test".byte_at(-1) -"#; - assert_aelys_int(code, -1); -} - -#[test] -fn string_substr_basic() { - let code = r#" -let s = "hello world".substr(0, 5) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_substr_negative_start() { - let code = r#" -let s = "hello".substr(-1, 3) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_substr_negative_len() { - let code = r#" -let s = "hello".substr(0, -5) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_to_upper() { - let code = r#" -let s = "hello".to_upper() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_to_lower() { - let code = r#" -let s = "HELLO".to_lower() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_capitalize() { - let code = r#" -let s = "hello".capitalize() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_capitalize_empty() { - let code = r#" -let s = "".capitalize() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_contains_true() { - let code = r#" -if "hello world".contains("wor") { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_contains_false() { - let code = r#" -if "hello".contains("xyz") { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_starts_with_true() { - let code = r#" -if "hello".starts_with("he") { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_starts_with_false() { - let code = r#" -if "hello".starts_with("lo") { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_ends_with_true() { - let code = r#" -if "hello".ends_with("lo") { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_ends_with_false() { - let code = r#" -if "hello".ends_with("he") { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_find_exists() { - let code = r#" -let pos = "hello world".find("wor") -if pos >= 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_find_not_found() { - let code = r#" -"hello".find("xyz") -"#; - assert_aelys_int(code, -1); -} - -#[test] -fn string_rfind_exists() { - let code = r#" -let pos = "hello hello".rfind("hello") -if pos > 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_rfind_not_found() { - let code = r#" -"hello".rfind("xyz") -"#; - assert_aelys_int(code, -1); -} - -#[test] -fn string_count_occurrences() { - let code = r#" -"hello hello hello".count("hello") -"#; - assert_aelys_int(code, 3); -} - -#[test] -fn string_count_zero() { - let code = r#" -"hello".count("xyz") -"#; - assert_aelys_int(code, 0); -} - -#[test] -fn string_replace_all() { - let code = r#" -let s = "hello hello".replace("hello", "hi") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_replace_first() { - let code = r#" -let s = "hello hello".replace_first("hello", "hi") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_split_basic() { - let code = r#" -let parts = "a,b,c".split(",") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_split_empty_separator() { - let code = r#" -let parts = "abc".split("") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_join_basic() { - let code = r#" -let parts = "a\nb\nc" -let s = parts.join(",") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_repeat_positive() { - let code = r#" -let s = "ab".repeat(3) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_repeat_zero() { - let code = r#" -let s = "abc".repeat(0) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_repeat_negative() { - let code = r#" -let s = "abc".repeat(-5) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_reverse_basic() { - let code = r#" -let s = "abc".reverse() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_reverse_unicode() { - let code = r#" -let s = "héllo".reverse() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_concat_basic() { - let code = r#" -let s = "hello".concat(" world") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_trim_whitespace() { - let code = r#" -let s = " hello ".trim() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_trim_start() { - let code = r#" -let s = " hello".trim_start() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_trim_end() { - let code = r#" -let s = "hello ".trim_end() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_pad_left_basic() { - let code = r#" -let s = "5".pad_left(3, "0") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_pad_left_already_wide() { - let code = r#" -let s = "hello".pad_left(2, "x") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_pad_right_basic() { - let code = r#" -let s = "hi".pad_right(5, ".") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_is_empty_true() { - let code = r#" -if "".is_empty() { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_empty_false() { - let code = r#" -if "x".is_empty() { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_whitespace_true() { - let code = r#" -if " ".is_whitespace() { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_whitespace_false() { - let code = r#" -if " a ".is_whitespace() { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_whitespace_empty() { - let code = r#" -if "".is_whitespace() { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_numeric_true() { - let code = r#" -if "12345".is_numeric() { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_numeric_false() { - let code = r#" -if "12a34".is_numeric() { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_alphabetic_true() { - let code = r#" -if "hello".is_alphabetic() { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_alphabetic_false() { - let code = r#" -if "hello123".is_alphabetic() { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_alphanumeric_true() { - let code = r#" -if "hello123".is_alphanumeric() { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_is_alphanumeric_false() { - let code = r#" -if "hello-123".is_alphanumeric() { 0 } else { 1 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_lines_basic() { - let code = r#" -let s = "a\nb\nc".lines() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_line_count() { - let code = r#" -"a\nb\nc".line_count() -"#; - assert_aelys_int(code, 3); -} - -#[test] -fn string_line_count_empty() { - let code = r#" -"".line_count() -"#; - assert_aelys_int(code, 0); -} - -#[test] -fn string_bytes_basic() { - let code = r#" -let b = "AB".bytes() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_chars_basic() { - let code = r#" -let c = "abc".chars() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_unicode_length_mismatch() { - let code = r#" -let byte_len = "😀".len() -let char_len = "😀".char_len() -if byte_len > char_len { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_emoji_reverse() { - let code = r#" -let s = "😀😁".reverse() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn string_rtl_text() { - let code = r#" -let s = "مرحبا".reverse() -42 -"#; - assert_aelys_int(code, 42); -} diff --git a/aelys/tests/stdlib_sys_tests.rs b/aelys/tests/stdlib_sys_tests.rs deleted file mode 100644 index a524e12..0000000 --- a/aelys/tests/stdlib_sys_tests.rs +++ /dev/null @@ -1,296 +0,0 @@ -mod common; -use common::*; - -#[test] -fn sys_arg_count() { - let code = r#" -needs std.sys -let count = sys.arg_count() -if count >= 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sys_arg_negative_index() { - let code = r#" -needs std.sys -let a = sys.arg(-1) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_arg_out_of_bounds() { - let code = r#" -needs std.sys -let a = sys.arg(9999) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_args_returns_string() { - let code = r#" -needs std.sys -let args = sys.args() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_env_nonexistent() { - let code = r#" -needs std.sys -let val = sys.env("NONEXISTENT_VAR_12345") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_set_and_get_env() { - let code = r#" -needs std.sys -sys.set_env("AELYS_TEST_VAR", "test_value") -let val = sys.env("AELYS_TEST_VAR") -sys.unset_env("AELYS_TEST_VAR") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_unset_nonexistent_env() { - let code = r#" -needs std.sys -sys.unset_env("NONEXISTENT_999") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_env_vars_returns_string() { - let code = r#" -needs std.sys -let vars = sys.env_vars() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_pid_positive() { - let code = r#" -needs std.sys -let p = sys.pid() -if p > 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sys_cwd_returns_path() { - let code = r#" -needs std.sys -let cwd = sys.cwd() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_set_cwd_invalid() { - let code = r#" -needs std.sys -sys.set_cwd("/nonexistent/path/nowhere") -"#; - assert_aelys_error_contains(code, "cannot"); -} - -#[test] -fn sys_home_returns_path() { - let code = r#" -needs std.sys -let h = sys.home() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_platform_valid() { - let code = r#" -needs std.sys -let p = sys.platform() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_arch_valid() { - let code = r#" -needs std.sys -let a = sys.arch() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_os_valid() { - let code = r#" -needs std.sys -let o = sys.os() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_hostname_returns_string() { - let code = r#" -needs std.sys -let h = sys.hostname() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_cpu_count_positive() { - let code = r#" -needs std.sys -let c = sys.cpu_count() -if c > 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sys_exec_denied_without_capability() { - let code = r#" -needs std.sys -sys.exec("echo hi") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn sys_exec_output_denied() { - let code = r#" -needs std.sys -sys.exec_output("echo test") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn sys_exec_args_denied() { - let code = r#" -needs std.sys -sys.exec_args("ls", "-l") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn sys_exec_args_output_denied() { - let code = r#" -needs std.sys -sys.exec_args_output("echo", "test") -"#; - assert_aelys_error_contains(code, "capability"); -} - -#[test] -fn sys_random_in_range() { - let code = r#" -needs std.sys -let r = sys.random() -if r >= 0.0 and r < 1.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sys_random_int_basic() { - let code = r#" -needs std.sys -let r = sys.random_int(1, 10) -if r >= 1 and r <= 10 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sys_random_int_min_greater_than_max() { - let code = r#" -needs std.sys -sys.random_int(10, 5) -"#; - let err = run_aelys_err(code); - assert!(err.contains("min") || err.contains("max")); -} - -#[test] -fn sys_random_int_same_bounds() { - let code = r#" -needs std.sys -let r = sys.random_int(5, 5) -if r == 5 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn sys_script_path_returns_value() { - let code = r#" -needs std.sys -let p = sys.script_path() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_script_dir_returns_value() { - let code = r#" -needs std.sys -let d = sys.script_dir() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_multiple_env_operations() { - let code = r#" -needs std.sys -sys.set_env("TEST1", "val1") -sys.set_env("TEST2", "val2") -let v1 = sys.env("TEST1") -let v2 = sys.env("TEST2") -sys.unset_env("TEST1") -sys.unset_env("TEST2") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sys_random_different_calls() { - // Not truly a test since random might be same, but checks it works - let code = r#" -needs std.sys -let r1 = sys.random() -let r2 = sys.random() -42 -"#; - assert_aelys_int(code, 42); -} diff --git a/aelys/tests/stdlib_time_tests.rs b/aelys/tests/stdlib_time_tests.rs deleted file mode 100644 index 7e8fd38..0000000 --- a/aelys/tests/stdlib_time_tests.rs +++ /dev/null @@ -1,301 +0,0 @@ -mod common; -use common::*; - -#[test] -fn time_now_returns_reasonable_value() { - let code = r#" -let t = now() -if t > 1600000000.0 and t < 2000000000.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn time_now_ms_positive() { - let code = r#" -let t = now_ms() -if t > 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn time_now_us_greater_than_ms() { - let code = r#" -let ms = now_ms() -let us = now_us() -if us >= ms { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn timer_and_elapsed() { - let code = r#" -let t = timer() -let e = elapsed(t) -if e >= 0.0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn timer_elapsed_ms() { - let code = r#" -let t = timer() -sleep(10) -let e = elapsed_ms(t) -if e >= 5 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn timer_elapsed_us() { - let code = r#" -let t = timer() -let e = elapsed_us(t) -if e >= 0 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn timer_reset() { - let code = r#" -let t = timer() -sleep(20) -reset(t) -let e = elapsed_ms(t) -if e < 15 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn timer_invalid_handle() { - let code = r#" -elapsed(999) -"#; - assert_aelys_error_contains(code, "invalid"); -} - -#[test] -fn reset_invalid_handle() { - let code = r#" -reset(777) -"#; - assert_aelys_error_contains(code, "invalid"); -} - -#[test] -fn sleep_zero_ms() { - let code = r#" -sleep(0) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sleep_negative_ignored() { - let code = r#" -sleep(-100) -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn sleep_us_works() { - let code = r#" -sleep_us(1000) -1 -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn year_is_current() { - let code = r#" -let y = year() -if y >= 2024 and y <= 2030 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn month_in_range() { - let code = r#" -let m = month() -if m >= 1 and m <= 12 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn day_in_range() { - let code = r#" -let d = day() -if d >= 1 and d <= 31 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn hour_in_range() { - let code = r#" -let h = hour() -if h >= 0 and h < 24 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn minute_in_range() { - let code = r#" -let m = minute() -if m >= 0 and m < 60 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn second_in_range() { - let code = r#" -let s = second() -if s >= 0 and s < 60 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn weekday_in_range() { - let code = r#" -let w = weekday() -if w >= 0 and w < 7 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn yearday_in_range() { - let code = r#" -let yd = yearday() -if yd >= 1 and yd <= 366 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn format_year() { - let code = r#" -let s = format("%Y") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn format_complex() { - let code = r#" -let s = format("%Y-%m-%d %H:%M:%S") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn iso_format() { - let code = r#" -let s = iso() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn date_format() { - let code = r#" -let s = date() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn time_str_format() { - let code = r#" -let s = time_str() -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn elapsed_ms_invalid_handle() { - let code = r#" -elapsed_ms(12345) -"#; - assert_aelys_error_contains(code, "invalid"); -} - -#[test] -fn elapsed_us_invalid_handle() { - let code = r#" -elapsed_us(54321) -"#; - let err = run_aelys_err(code); - assert!(err.contains("invalid") || err.contains("handle")); -} - -#[test] -fn sleep_precise() { - let code = r#" -let t = timer() -sleep(50) -let e = elapsed_ms(t) -if e >= 45 and e < 200 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn multiple_timers() { - let code = r#" -let t1 = timer() -sleep(10) -let t2 = timer() -let e1 = elapsed_ms(t1) -let e2 = elapsed_ms(t2) -if e1 > e2 { 1 } else { 0 } -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn format_percent_escape() { - let code = r#" -let s = format("100%%") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn format_weekday_name() { - let code = r#" -let s = format("%a") -42 -"#; - assert_aelys_int(code, 42); -} - -#[test] -fn format_month_name() { - let code = r#" -let s = format("%b") -42 -"#; - assert_aelys_int(code, 42); -} diff --git a/aelys/tests/string_iteration_tests.rs b/aelys/tests/string_iteration_tests.rs deleted file mode 100644 index 2869946..0000000 --- a/aelys/tests/string_iteration_tests.rs +++ /dev/null @@ -1,263 +0,0 @@ -mod common; -use common::*; - -// String Indexing Tests (stuff[i]) -#[test] -fn string_index_first_char() { - let code = r#" -let s = "hello" -let c = s[0] -c.char_len() -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_index_last_char() { - let code = r#" -let s = "abcde" -let c = s[4] -c.char_len() -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_index_accumulate_with_range_for() { - let code = r#" -let s = "hello" -let mut count = 0 -for i in 0..s.char_len() { - let c = s[i] - count++ -} -count -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn string_index_returns_single_char_string() { - let code = r#" -let s = "abc" -let c = s[1] -c -"#; - assert_aelys_str(code, "b"); -} - -#[test] -fn string_index_first() { - let code = r#" -let s = "xyz" -s[0] -"#; - assert_aelys_str(code, "x"); -} - -#[test] -fn string_index_middle() { - let code = r#" -let s = "world" -s[2] -"#; - assert_aelys_str(code, "r"); -} - -// ============================================================================= -// String ForEach Iteration Tests (for letter in stuff) -// ============================================================================= - -#[test] -fn string_foreach_count_chars() { - let code = r#" -let mut count = 0 -for c in "hello" { - count++ -} -count -"#; - assert_aelys_int(code, 5); -} - -#[test] -fn string_foreach_with_variable() { - let code = r#" -let stuff = "abcdef" -let mut count = 0 -for letter in stuff { - count++ -} -count -"#; - assert_aelys_int(code, 6); -} - -#[test] -fn string_foreach_empty_string() { - let code = r#" -let mut count = 0 -for c in "" { - count++ -} -count -"#; - assert_aelys_int(code, 0); -} - -#[test] -fn string_foreach_single_char() { - let code = r#" -let mut count = 0 -for c in "x" { - count++ -} -count -"#; - assert_aelys_int(code, 1); -} - -#[test] -fn string_foreach_first_char() { - let code = r#" -let mut result = "" -for c in "hello" { - result = c - break -} -result -"#; - assert_aelys_str(code, "h"); -} - -#[test] -fn string_foreach_in_function() { - let code = r#" -fn count_chars(s: string) -> int { - let mut n = 0 - for c in s { - n++ - } - return n -} -count_chars("testing") -"#; - assert_aelys_int(code, 7); -} - -#[test] -fn string_foreach_nested_in_range_for() { - let code = r#" -let mut total = 0 -for i in 0..3 { - for c in "ab" { - total++ - } -} -total -"#; - assert_aelys_int(code, 6); -} - -#[test] -fn string_foreach_with_conditional() { - let code = r#" -let mut count = 0 -for c in "aAbBcC" { - if c == "a" or c == "b" or c == "c" { - count++ - } -} -count -"#; - assert_aelys_int(code, 3); -} - -// ============================================================================= -// String Indexing + ForEach Consistency -// ============================================================================= - -#[test] -fn string_index_and_foreach_same_count() { - let code = r#" -let s = "hello world" -let mut count_index = 0 -for i in 0..s.char_len() { - let c = s[i] - count_index++ -} -let mut count_foreach = 0 -for c in s { - count_foreach++ -} -if count_index == count_foreach { count_index } else { -1 } -"#; - assert_aelys_int(code, 11); -} - -// ============================================================================= -// Unicode Handling -// ============================================================================= - -#[test] -fn string_foreach_unicode_accented() { - let code = r#" -let mut count = 0 -for c in "café" { - count++ -} -count -"#; - assert_aelys_int(code, 4); -} - -#[test] -fn string_foreach_unicode_multibyte() { - let code = r#" -let mut count = 0 -for c in "héllo" { - count++ -} -count -"#; - assert_aelys_int(code, 5); -} - -// ============================================================================= -// ForEach with break -// ============================================================================= - -#[test] -fn string_foreach_break_early() { - let code = r#" -let mut count = 0 -for c in "abcdefgh" { - count++ - if count == 3 { - break - } -} -count -"#; - assert_aelys_int(code, 3); -} - -// ============================================================================= -// Indexing on string literal -// ============================================================================= - -#[test] -fn string_literal_index() { - let code = r#" -"hello"[0] -"#; - assert_aelys_str(code, "h"); -} - -#[test] -fn string_literal_index_last() { - let code = r#" -"world"[4] -"#; - assert_aelys_str(code, "d"); -} diff --git a/aelys/tests/test_opt/combined_opt.aelys b/aelys/tests/test_opt/combined_opt.aelys deleted file mode 100644 index 24949d1..0000000 --- a/aelys/tests/test_opt/combined_opt.aelys +++ /dev/null @@ -1,65 +0,0 @@ -needs std.io -needs std.convert - -// Constantes globales -let MAX_ITERATIONS = 100 -let THRESHOLD = 0.001 - -// Test: constant folding + propagation -let COMPUTED = (MAX_ITERATIONS / 2) + 10 // devrait devenir 60 - -fn fibonacci_limit() -> int { - // Devrait propager MAX_ITERATIONS et folder l'expression - return MAX_ITERATIONS - 10 // devrait devenir 90 -} - -// Test: toutes les optimisations -fn complex_test(n: int) -> int { - // Constant folding - let base = 2 * 3 * 4 // devrait devenir 24 - - // Global const propagation - let limit = MAX_ITERATIONS // devrait propager 100 - - // Dead code elimination - if false { - return -999 // dead code - } - - // Expression mixte - let result = base + (limit / 10) + n // 24 + 10 + n = 34 + n - - // Return avec calcul - return result -} - -// Test d'expression arithmétique complexe -fn arithmetic_chain() -> int { - // tout ceci devrait être foldé en une seule constante - let a = 1 + 2 // 3 - let b = a * 3 // 9 (si propagation locale) - let c = 10 + 20 + 30 // 60 - let d = c / 6 // 10 - let e = 100 - 50 - 25 // 25 - - return (2 + 3) * (4 + 6) // devrait devenir 50 -} - -// Test de conditions constantes -fn const_conditions() -> int { - // Ces conditions devraient être évaluées à la compilation - if 10 > 5 { // toujours vrai - if 3 < 2 { // toujours faux - return 0 // dead code - } - return 42 // seul code exécuté - } - return -1 // dead code -} - -io.print("Combined optimization test:") -io.print("COMPUTED = " + convert.to_string(COMPUTED)) -io.print("fibonacci_limit() = " + convert.to_string(fibonacci_limit())) -io.print("complex_test(6) = " + convert.to_string(complex_test(6))) -io.print("arithmetic_chain() = " + convert.to_string(arithmetic_chain())) -io.print("const_conditions() = " + convert.to_string(const_conditions())) diff --git a/aelys/tests/test_opt/constant_folding.aelys b/aelys/tests/test_opt/constant_folding.aelys deleted file mode 100644 index 85368eb..0000000 --- a/aelys/tests/test_opt/constant_folding.aelys +++ /dev/null @@ -1,48 +0,0 @@ -needs std.io -needs std.convert - -// Arithmétique entière simple -let a = 2 + 3 // devrait devenir 5 -let b = 10 - 4 // devrait devenir 6 -let c = 6 * 7 // devrait devenir 42 -let d = 100 / 5 // devrait devenir 20 -let e = 17 % 5 // devrait devenir 2 - -// Expressions imbriquées -let f = (2 + 3) * (4 + 1) // devrait devenir 25 -let g = 100 / (2 + 3) // devrait devenir 20 -let h = (10 - 5) * (8 / 2) // devrait devenir 20 - -// Opérations flottantes -let pi_approx = 22.0 / 7.0 -let area = 3.14159 * 10.0 * 10.0 // devrait être précalculé - -// Opérations logiques -let l1 = true and true // devrait devenir true -let l2 = true and false // devrait devenir false -let l3 = false or true // devrait devenir true -let l4 = not false // devrait devenir true - -// Comparaisons constantes -let cmp1 = 5 > 3 // devrait devenir true -let cmp2 = 10 == 10 // devrait devenir true -let cmp3 = 7 <= 7 // devrait devenir true -let cmp4 = 3 != 3 // devrait devenir false - -// Concaténation de strings (si supporté) -let s = "Hello, " + "World!" - -// Opérations unaires -let neg = -42 -let neg2 = -(10 + 5) // devrait devenir -15 - -io.print("Constant folding test:") -io.print("a = " + convert.to_string(a)) -io.print("b = " + convert.to_string(b)) -io.print("c = " + convert.to_string(c)) -io.print("d = " + convert.to_string(d)) -io.print("e = " + convert.to_string(e)) -io.print("f = " + convert.to_string(f)) -io.print("g = " + convert.to_string(g)) -io.print("h = " + convert.to_string(h)) -io.print("s = " + s) diff --git a/aelys/tests/test_opt/dead_code.aelys b/aelys/tests/test_opt/dead_code.aelys deleted file mode 100644 index 748414a..0000000 --- a/aelys/tests/test_opt/dead_code.aelys +++ /dev/null @@ -1,56 +0,0 @@ -needs std.io -needs std.convert - -// Code inaccessible après return -fn test_return(x: int) -> int { - return x * 2 - let dead = x + 100 // dead code - return dead // dead code -} - -// Branches toujours fausses -fn test_false_branch() { - if false { - io.print("This should never print") // dead code - } - - if true { - io.print("Always executed") - } -} - -// Variables non utilisées -fn test_unused() { - let unused1 = 42 // devrait être éliminé - let unused2 = 100 + 200 // devrait être éliminé - let used = 10 - io.print("used = " + convert.to_string(used)) -} - -// Conditions constantes -fn test_const_conditions() { - let x = 10 - - // Cette condition est constante après propagation - if 5 > 3 { - io.print("5 > 3 is always true") - } - - if 2 > 5 { - io.print("This is dead code") // dead code - } -} - -fn test_empty_loop() { - for i in 0..0 { - io.print("Never executed") // dead code (empty range) - } -} - -io.print("Dead code elimination test:") -io.print("test_return(5) = " + convert.to_string(test_return(5))) -test_false_branch() -test_unused() -test_const_conditions() -test_empty_loop() -io.print("Done!") diff --git a/aelys/tests/test_opt/for_loop_test.aelys b/aelys/tests/test_opt/for_loop_test.aelys deleted file mode 100644 index bf99e3c..0000000 --- a/aelys/tests/test_opt/for_loop_test.aelys +++ /dev/null @@ -1,26 +0,0 @@ -needs std.io -needs std.convert - -io.print("Testing for loop ranges:") - -io.print("for i in 0..0 (should print nothing):") -for i in 0..0 { - io.print(" iteration: " + convert.to_string(i)) -} - -io.print("for i in 0..1 (should print 0):") -for i in 0..1 { - io.print(" iteration: " + convert.to_string(i)) -} - -io.print("for i in 0..3 (should print 0,1,2):") -for i in 0..3 { - io.print(" iteration: " + convert.to_string(i)) -} - -io.print("for i in 5..5 (should print nothing):") -for i in 5..5 { - io.print(" iteration: " + convert.to_string(i)) -} - -io.print("Done!") diff --git a/aelys/tests/test_opt/global_const_prop.aelys b/aelys/tests/test_opt/global_const_prop.aelys deleted file mode 100644 index 7aede6a..0000000 --- a/aelys/tests/test_opt/global_const_prop.aelys +++ /dev/null @@ -1,43 +0,0 @@ -needs std.io -needs std.convert - -// Constantes globales -let WIDTH = 800 -let HEIGHT = 600 -let ASPECT = WIDTH / HEIGHT -let HALF_WIDTH = WIDTH / 2 -let HALF_HEIGHT = HEIGHT / 2 - -// Utilisation des constantes -let total_pixels = WIDTH * HEIGHT // devrait devenir 480000 -let center_x = HALF_WIDTH // devrait propager 400 -let center_y = HALF_HEIGHT // devrait propager 300 -let double_width = WIDTH * 2 // devrait devenir 1600 - -// Constantes avec calculs -let PI = 3.14159 -let RADIUS = 10.0 -let CIRCLE_AREA = PI * RADIUS * RADIUS // devrait être précalculé - -// Propagation en cascade -let BASE = 100 -let LEVEL1 = BASE * 2 // 200 -let LEVEL2 = LEVEL1 + 50 // 250 -let LEVEL3 = LEVEL2 * 2 // 500 - -fn calculate_area(r: float) -> float { - // PI devrait être propagé ici - return PI * r * r -} - -fn screen_center() -> int { - // WIDTH et HEIGHT devraient être propagés - return (WIDTH / 2) + (HEIGHT / 2) -} - -io.print("Global constant propagation test:") -io.print("WIDTH = " + convert.to_string(WIDTH)) -io.print("HEIGHT = " + convert.to_string(HEIGHT)) -io.print("total_pixels = " + convert.to_string(total_pixels)) -io.print("LEVEL3 = " + convert.to_string(LEVEL3)) -io.print("screen_center() = " + convert.to_string(screen_center())) diff --git a/aelys/tests/type_system_tests.rs b/aelys/tests/type_system_tests.rs deleted file mode 100644 index 1fffcb9..0000000 --- a/aelys/tests/type_system_tests.rs +++ /dev/null @@ -1,594 +0,0 @@ -use aelys::run; -use aelys_runtime::Value; - -/// Helper to run code and expect success -fn run_ok(source: &str) -> Value { - run(source, "test.aelys").expect("Expected program to run successfully") -} - -/// Helper to run code and expect an error -#[allow(dead_code)] -fn run_err(source: &str) -> String { - run(source, "test.aelys") - .expect_err("Expected program to fail") - .to_string() -} - -#[test] -fn test_let_with_int_type() { - let result = run_ok( - r#" - let x: int = 42 - x - "#, - ); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -#[allow(clippy::approx_constant)] -fn test_let_with_float_type() { - let result = run_ok( - r#" - let x: float = 3.14 - x - "#, - ); - assert!((result.as_float().unwrap() - 3.14).abs() < 0.001); -} - -#[test] -fn test_let_with_bool_type() { - let result = run_ok( - r#" - let x: bool = true - x - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_let_with_string_type() { - let result = run_ok( - r#" - let x: string = "hello" - x - "#, - ); - // String comparison would need heap access, just verify it runs - assert!(result.as_ptr().is_some()); -} - -#[test] -fn test_let_mutable_with_type() { - let result = run_ok( - r#" - let mut x: int = 10 - x += 5 - x - "#, - ); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_function_with_typed_params() { - let result = run_ok( - r#" - fn add(a: int, b: int) { - return a + b - } - add(3, 4) - "#, - ); - assert_eq!(result.as_int(), Some(7)); -} - -#[test] -fn test_function_with_return_type() { - let result = run_ok( - r#" - fn square(x: int) -> int { - return x * x - } - square(5) - "#, - ); - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn test_function_with_typed_params_and_return() { - let result = run_ok( - r#" - fn multiply(a: int, b: int) -> int { - return a * b - } - multiply(6, 7) - "#, - ); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_function_void_return_type() { - let result = run_ok( - r#" - let mut counter: int = 0 - - fn increment() -> void { - counter++ - } - - increment() - increment() - counter - "#, - ); - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_lambda_with_typed_params() { - let result = run_ok( - r#" - let add = fn(a: int, b: int) { return a + b } - add(10, 20) - "#, - ); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_lambda_with_return_type() { - let result = run_ok( - r#" - let double = fn(x: int) -> int { return x * 2 } - double(15) - "#, - ); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_infer_int_literal() { - let result = run_ok( - r#" - let x = 100 - x + 50 - "#, - ); - assert_eq!(result.as_int(), Some(150)); -} - -#[test] -fn test_infer_float_literal() { - let result = run_ok( - r#" - let x = 2.5 - x * 4.0 - "#, - ); - assert!((result.as_float().unwrap() - 10.0).abs() < 0.001); -} - -#[test] -fn test_infer_bool_literal() { - let result = run_ok( - r#" - let x = true - let y = false - x and not y - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_infer_from_binary_op() { - let result = run_ok( - r#" - let sum = 10 + 20 - let diff = sum - 5 - diff - "#, - ); - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn test_int_addition_specialized() { - // This should emit AddII opcode - let result = run_ok( - r#" - let a: int = 100 - let b: int = 200 - a + b - "#, - ); - assert_eq!(result.as_int(), Some(300)); -} - -#[test] -fn test_int_subtraction_specialized() { - // This should emit SubII opcode - let result = run_ok( - r#" - let a: int = 500 - let b: int = 123 - a - b - "#, - ); - assert_eq!(result.as_int(), Some(377)); -} - -#[test] -fn test_int_multiplication_specialized() { - // This should emit MulII opcode - let result = run_ok( - r#" - let a: int = 12 - let b: int = 11 - a * b - "#, - ); - assert_eq!(result.as_int(), Some(132)); -} - -#[test] -fn test_int_division_specialized() { - // This should emit DivII opcode - let result = run_ok( - r#" - let a: int = 100 - let b: int = 4 - a / b - "#, - ); - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn test_int_modulo_specialized() { - // This should emit ModII opcode - let result = run_ok( - r#" - let a: int = 17 - let b: int = 5 - a % b - "#, - ); - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_int_comparison_lt_specialized() { - // This should emit LtII opcode - let result = run_ok( - r#" - let a: int = 10 - let b: int = 20 - a < b - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_int_comparison_le_specialized() { - // This should emit LeII opcode - let result = run_ok( - r#" - let a: int = 10 - let b: int = 10 - a <= b - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_int_comparison_gt_specialized() { - // This should emit GtII opcode - let result = run_ok( - r#" - let a: int = 30 - let b: int = 20 - a > b - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_int_comparison_ge_specialized() { - // This should emit GeII opcode - let result = run_ok( - r#" - let a: int = 20 - let b: int = 20 - a >= b - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_int_equality_specialized() { - // This should emit EqII opcode - let result = run_ok( - r#" - let a: int = 42 - let b: int = 42 - a == b - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_int_not_equal_specialized() { - // This should emit NeII opcode - let result = run_ok( - r#" - let a: int = 42 - let b: int = 43 - a != b - "#, - ); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_float_addition_specialized() { - // This should emit AddFF opcode - let result = run_ok( - r#" - let a: float = 1.5 - let b: float = 2.5 - a + b - "#, - ); - assert!((result.as_float().unwrap() - 4.0).abs() < 0.001); -} - -#[test] -fn test_float_subtraction_specialized() { - // This should emit SubFF opcode - let result = run_ok( - r#" - let a: float = 10.0 - let b: float = 3.5 - a - b - "#, - ); - assert!((result.as_float().unwrap() - 6.5).abs() < 0.001); -} - -#[test] -fn test_float_multiplication_specialized() { - // This should emit MulFF opcode - let result = run_ok( - r#" - let a: float = 2.5 - let b: float = 4.0 - a * b - "#, - ); - assert!((result.as_float().unwrap() - 10.0).abs() < 0.001); -} - -#[test] -fn test_float_division_specialized() { - // This should emit DivFF opcode - let result = run_ok( - r#" - let a: float = 15.0 - let b: float = 3.0 - a / b - "#, - ); - assert!((result.as_float().unwrap() - 5.0).abs() < 0.001); -} - -#[test] -fn test_int_float_mixed_arithmetic() { - // When types are mixed, the generic opcodes should be used - let result = run_ok( - r#" - let a: int = 5 - let b: float = 2.5 - a + b - "#, - ); - assert!((result.as_float().unwrap() - 7.5).abs() < 0.001); -} - -#[test] -fn test_inferred_types_in_loop() { - // Loop counter should be inferred as int - let result = run_ok( - r#" - let mut sum = 0 - let mut i = 0 - while i < 10 { - sum += i - i++ - } - sum - "#, - ); - assert_eq!(result.as_int(), Some(45)); -} - -#[test] -fn test_typed_loop_counter() { - let result = run_ok( - r#" - let mut sum: int = 0 - let mut i: int = 0 - while i < 5 { - sum += i * i - i++ - } - sum - "#, - ); - // 0 + 1 + 4 + 9 + 16 = 30 - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_for_loop_with_typed_bounds() { - let result = run_ok( - r#" - let mut sum: int = 0 - for i in 1..6 { - sum += i - } - sum - "#, - ); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_nested_function_with_types() { - let result = run_ok( - r#" - fn outer(x: int) -> int { - fn inner(y: int) -> int { - return y * 2 - } - return inner(x) + 1 - } - outer(10) - "#, - ); - assert_eq!(result.as_int(), Some(21)); -} - -#[test] -fn test_closure_with_typed_capture() { - let result = run_ok( - r#" - fn make_adder(x: int) { - return fn(y: int) -> int { - return x + y - } - } - let add10 = make_adder(10) - add10(5) - "#, - ); - assert_eq!(result.as_int(), Some(15)); -} - -#[test] -fn test_recursive_function_with_types() { - let result = run_ok( - r#" - fn factorial(n: int) -> int { - if n <= 1 { - return 1 - } - return n * factorial(n - 1) - } - factorial(5) - "#, - ); - assert_eq!(result.as_int(), Some(120)); -} - -#[test] -fn test_multiple_typed_functions() { - let result = run_ok( - r#" - fn square(x: int) -> int { - return x * x - } - - fn cube(x: int) -> int { - return x * square(x) - } - - cube(3) - "#, - ); - assert_eq!(result.as_int(), Some(27)); -} - -#[test] -fn test_various_int_types() { - let result = run_ok( - r#" - let a: int = 10 - let b: int = 20 - let c: int = 30 - let d: int64 = 40 - a + b + c + d - "#, - ); - assert_eq!(result.as_int(), Some(100)); -} - -#[test] -fn test_various_uint_types() { - let result = run_ok( - r#" - let a: int = 10 - let b: int = 20 - a + b - "#, - ); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_various_float_types() { - let result = run_ok( - r#" - let a: float = 1.5 - let b: float64 = 2.5 - a + b - "#, - ); - assert!((result.as_float().unwrap() - 4.0).abs() < 0.001); -} - -#[test] -fn test_function_without_type_annotations() { - let result = run_ok( - r#" - fn add(a, b) { - return a + b - } - add(10, 20) - "#, - ); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_let_without_type_annotation() { - let result = run_ok( - r#" - let x = 42 - let y = x + 8 - y - "#, - ); - assert_eq!(result.as_int(), Some(50)); -} - -#[test] -fn test_lambda_without_type_annotations() { - let result = run_ok( - r#" - let double = fn(x) { return x * 2 } - double(25) - "#, - ); - assert_eq!(result.as_int(), Some(50)); -} diff --git a/aelys/tests/value_test.rs b/aelys/tests/value_test.rs deleted file mode 100644 index 8fc13d8..0000000 --- a/aelys/tests/value_test.rs +++ /dev/null @@ -1,81 +0,0 @@ -use aelys_runtime::Value; - -#[test] -fn test_int_roundtrip() { - for n in [ - 0i64, - 1, - -1, - 42, - -42, - 1000000, - -1000000, - i64::MAX >> 16, - i64::MIN >> 16, - ] { - let v = Value::int(n); - assert!(v.is_int(), "Expected int for {}", n); - assert_eq!(v.as_int(), Some(n), "Roundtrip failed for {}", n); - } -} - -#[test] -fn test_float_roundtrip() { - for n in [ - 0.0f64, - 1.0, - -1.0, - std::f64::consts::PI, - f64::MAX, - f64::MIN, - f64::INFINITY, - ] { - let v = Value::float(n); - assert!(v.is_float(), "Expected float for {}", n); - assert_eq!(v.as_float(), Some(n), "Roundtrip failed for {}", n); - } -} - -#[test] -fn test_bool_roundtrip() { - let t = Value::bool(true); - let f = Value::bool(false); - - assert!(t.is_bool()); - assert!(f.is_bool()); - assert_eq!(t.as_bool(), Some(true)); - assert_eq!(f.as_bool(), Some(false)); -} - -#[test] -fn test_null() { - let n = Value::null(); - assert!(n.is_null()); - assert!(!n.is_int()); - assert!(!n.is_float()); - assert!(!n.is_bool()); -} - -#[test] -fn test_type_discrimination() { - let int = Value::int(42); - let float = Value::float(2.72); - let boolean = Value::bool(true); - let null = Value::null(); - - assert!(int.is_int() && !int.is_float() && !int.is_bool() && !int.is_null()); - assert!(!float.is_int() && float.is_float() && !float.is_bool() && !float.is_null()); - assert!(!boolean.is_int() && !boolean.is_float() && boolean.is_bool() && !boolean.is_null()); - assert!(!null.is_int() && !null.is_float() && !null.is_bool() && null.is_null()); -} - -#[test] -fn test_is_truthy() { - assert!(Value::bool(true).is_truthy()); - assert!(!Value::bool(false).is_truthy()); - assert!(!Value::null().is_truthy()); - assert!(Value::int(1).is_truthy()); - assert!(!Value::int(0).is_truthy()); - assert!(Value::float(1.0).is_truthy()); - assert!(!Value::float(0.0).is_truthy()); -} diff --git a/aelys/tests/vm_argument_tests.rs b/aelys/tests/vm_argument_tests.rs deleted file mode 100644 index 0d3845c..0000000 --- a/aelys/tests/vm_argument_tests.rs +++ /dev/null @@ -1,179 +0,0 @@ -use aelys_common::RuntimeErrorKind; -use aelys_runtime::{VM, VmArgsError, VmConfig, VmConfigError, parse_vm_args}; -use aelys_syntax::Source; - -#[test] -fn parse_vm_args_default() { - let parsed = parse_vm_args(&[]).expect("should parse defaults"); - assert_eq!( - parsed.config.max_heap_bytes, - VmConfig::DEFAULT_MAX_HEAP_BYTES - ); - assert!(!parsed.config.capabilities.allow_fs); - assert!(!parsed.config.capabilities.allow_net); - assert!(!parsed.config.capabilities.allow_exec); - assert!(parsed.program_args.is_empty()); -} - -#[test] -fn parse_vm_args_dev_flag_enables_hot_reload() { - let parsed = parse_vm_args(&["--dev".to_string()]).expect("should parse"); - assert!(parsed.config.allow_hot_reload); -} - -#[test] -fn parse_vm_args_allow_deny_caps() { - let parsed = parse_vm_args(&[ - "--allow-caps=net,gpu".to_string(), - "--deny-caps=exec".to_string(), - ]) - .expect("should parse"); - assert!(parsed.config.allowed_caps.contains("net")); - assert!(parsed.config.allowed_caps.contains("gpu")); - assert!(parsed.config.denied_caps.contains("exec")); -} - -#[test] -fn jvm_style_max_heap_too_small() { - let err = parse_vm_args(&["-ae.max-heap=4096".to_string()]) - .err() - .expect("should fail"); - match err { - VmArgsError::InvalidValue { reason, .. } => { - assert!(reason.contains("must be >=")); - } - VmArgsError::InvalidConfig(VmConfigError::MaxHeapTooSmall { .. }) => {} - _ => panic!("unexpected error: {:?}", err), - } -} - -#[test] -fn gnu_style_max_heap() { - let parsed = parse_vm_args(&["--ae-max-heap=1G".to_string(), "script.aelys".to_string()]) - .expect("should parse"); - assert_eq!(parsed.config.max_heap_bytes, 1024 * 1024 * 1024); - assert_eq!(parsed.program_args, vec!["script.aelys".to_string()]); -} - -#[test] -fn invalid_vm_arg_value_errors() { - let err = parse_vm_args(&["-ae.max-heap=not-a-number".to_string()]) - .err() - .expect("should error"); - match err { - VmArgsError::InvalidValue { reason, .. } => { - assert!(reason.contains("invalid integer")); - } - _ => panic!("unexpected error"), - } -} - -#[test] -fn parse_vm_args_capabilities_flags() { - let parsed = parse_vm_args(&[ - "-ae.allow-fs=true".to_string(), - "-ae.allow-net=false".to_string(), - "-ae.allow-exec=true".to_string(), - ]) - .expect("should parse capabilities"); - assert!(parsed.config.capabilities.allow_fs); - assert!(!parsed.config.capabilities.allow_net); - assert!(parsed.config.capabilities.allow_exec); -} - -#[test] -fn trusted_overrides_capabilities() { - let parsed = parse_vm_args(&[ - "-ae.trusted=true".to_string(), - "-ae.allow-fs=false".to_string(), - "-ae.allow-net=false".to_string(), - "-ae.allow-exec=false".to_string(), - ]) - .expect("should parse trusted override"); - assert!(parsed.config.capabilities.allow_fs); - assert!(parsed.config.capabilities.allow_net); - assert!(parsed.config.capabilities.allow_exec); -} - -#[test] -fn invalid_capability_value_errors() { - let err = parse_vm_args(&["-ae.allow-fs=maybe".to_string()]) - .err() - .expect("should error"); - match err { - VmArgsError::InvalidValue { reason, .. } => { - assert!(reason.contains("expected true or false")); - } - _ => panic!("unexpected error"), - } -} - -#[test] -fn trusted_clears_allowed_caps() { - // Regression test: trusted should clear allowed_caps, not just denied_caps - let parsed = parse_vm_args(&[ - "--allow-caps=net".to_string(), - "-ae.trusted=true".to_string(), - ]) - .expect("should parse"); - - // With trusted=true, allowed_caps should be empty (allowing everything) - assert!(parsed.config.allowed_caps.is_empty()); - assert!(parsed.config.denied_caps.is_empty()); - - // All capabilities should be enabled - assert!(parsed.config.capabilities.allow_fs); - assert!(parsed.config.capabilities.allow_net); - assert!(parsed.config.capabilities.allow_exec); - - // Native caps check should allow any capability - assert!(parsed.config.check_native_capability("fs").is_ok()); - assert!(parsed.config.check_native_capability("net").is_ok()); - assert!(parsed.config.check_native_capability("gpu").is_ok()); - assert!(parsed.config.check_native_capability("anything").is_ok()); -} - -#[test] -fn heap_limit_triggers_out_of_memory() { - let config = VmConfig::new(1024 * 1024).expect("valid config"); - let src = Source::new("", ""); - let mut vm = VM::with_config_and_args(src, config.clone(), Vec::new()).expect("vm init"); - - let large = "a".repeat(2 * 1024 * 1024); - let err = vm.alloc_string(&large).expect_err("should fail"); - match err.kind { - RuntimeErrorKind::OutOfMemory { .. } => {} - _ => panic!("expected OutOfMemory"), - } -} - -#[test] -fn manual_heap_respects_limit() { - let config = VmConfig::new(2 * 1024 * 1024).expect("valid config"); - let src = Source::new("", ""); - let mut vm = VM::with_config_and_args(src, config.clone(), Vec::new()).expect("vm init"); - - let slots = (config.max_heap_bytes as usize / std::mem::size_of::()) + 1; - let err = vm.manual_alloc(slots, 0).expect_err("should OOM"); - match err.kind { - RuntimeErrorKind::OutOfMemory { .. } => {} - _ => panic!("expected OutOfMemory"), - } -} - -#[test] -fn merge_heap_rejects_over_limit() { - let config = VmConfig::new(2 * 1024 * 1024).expect("valid config"); - let src = Source::new("", ""); - let mut vm = VM::with_config_and_args(src, config.clone(), Vec::new()).expect("vm init"); - - let mut compile_heap = aelys_runtime::Heap::new(); - let large = "x".repeat(2 * 1024 * 1024); - compile_heap.alloc_string(&large); - - let err = vm.merge_heap(&mut compile_heap).expect_err("should OOM"); - match err.kind { - RuntimeErrorKind::OutOfMemory { .. } => {} - _ => panic!("expected OutOfMemory"), - } -} diff --git a/aelys/tests/vm_runtime_tests.rs b/aelys/tests/vm_runtime_tests.rs deleted file mode 100644 index e5f9d65..0000000 --- a/aelys/tests/vm_runtime_tests.rs +++ /dev/null @@ -1,24 +0,0 @@ -use aelys_backend::Compiler; -use aelys_frontend::{lexer::Lexer, parser::Parser}; -use aelys_runtime::{VM, VmConfig}; -use aelys_sema::TypeInference; -use aelys_syntax::Source; - -#[test] -fn vm_executes_bytecode() { - let src = Source::new("", "fn f() -> int { 1 } f()"); - let tokens = Lexer::with_source(src.clone()).scan().unwrap(); - let ast = Parser::new(tokens, src.clone()).parse().unwrap(); - let typed = TypeInference::infer_program(ast, src.clone()).unwrap(); - let (mut func, mut heap, _) = Compiler::new(None, src.clone()) - .compile_typed(&typed) - .unwrap(); - - let mut vm = VM::with_config_and_args(src, VmConfig::default(), vec![]).unwrap(); - let remap = vm.merge_heap(&mut heap).unwrap(); - func.remap_constants(&remap); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - - assert_eq!(result.to_string(), "1"); -} diff --git a/aelys/tests/vm_tests.rs b/aelys/tests/vm_tests.rs deleted file mode 100644 index 856c205..0000000 --- a/aelys/tests/vm_tests.rs +++ /dev/null @@ -1,1032 +0,0 @@ -//! Tests for the Aelys VM - -use aelys_common::{RuntimeError, RuntimeErrorKind}; -use aelys_runtime::{CallFrame, Function, GlobalLayout, MAX_FRAMES, OpCode, VM, Value}; -use aelys_syntax::Source; -use std::sync::Arc; - -fn make_test_source() -> Arc { - Source::new("test.aelys", "fn test() { }") -} - -#[test] -fn test_vm_creation() { - let source = make_test_source(); - let vm = VM::new(source).unwrap(); - - assert_eq!(vm.frame_count(), 0); - // VM now pre-allocates 32768 registers for performance - assert_eq!(vm.register_count(), 32768); - assert!(!vm.is_in_no_gc()); -} - -#[test] -fn test_push_pop_frame() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 5; - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - - let frame = CallFrame::new(func_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 0); - - vm.push_frame(frame).unwrap(); - assert_eq!(vm.frame_count(), 1); - - let popped = vm.pop_frame(); - assert!(popped.is_some()); - assert_eq!(vm.frame_count(), 0); -} - -#[test] -fn test_frame_stack_overflow() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 5; - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - - // Push MAX_FRAMES frames - for _ in 0..MAX_FRAMES { - let frame = CallFrame::new(func_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 0); - vm.push_frame(frame).unwrap(); - } - - // Next push should fail - let frame = CallFrame::new(func_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 0); - let result = vm.push_frame(frame); - assert!(result.is_err()); -} - -#[test] -fn test_read_write_register() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 10; - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - - let frame = CallFrame::new(func_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 0); - vm.push_frame(frame).unwrap(); - - // Write and read - vm.write_register(0, Value::int(42)).unwrap(); - assert_eq!(vm.read_register(0).unwrap().as_int(), Some(42)); - - vm.write_register(5, Value::bool(true)).unwrap(); - assert_eq!(vm.read_register(5).unwrap().as_bool(), Some(true)); -} - -#[test] -fn test_windowed_registers() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // First frame at base 0 - let mut func1 = Function::new(Some("func1".to_string()), 0); - func1.num_registers = 5; - let func1_ref = vm.alloc_function(func1).unwrap(); - - let frame1 = CallFrame::new(func1_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 0); - vm.push_frame(frame1).unwrap(); - - vm.write_register(0, Value::int(100)).unwrap(); - vm.write_register(1, Value::int(200)).unwrap(); - - // Second frame at base 5 - let mut func2 = Function::new(Some("func2".to_string()), 0); - func2.num_registers = 3; - let func2_ref = vm.alloc_function(func2).unwrap(); - - let frame2 = CallFrame::new(func2_ref, 5, std::ptr::null(), 0, std::ptr::null(), 0, 0); - vm.push_frame(frame2).unwrap(); - - // These writes go to different slots - vm.write_register(0, Value::int(10)).unwrap(); - vm.write_register(1, Value::int(20)).unwrap(); - - // Current frame sees 10, 20 - assert_eq!(vm.read_register(0).unwrap().as_int(), Some(10)); - assert_eq!(vm.read_register(1).unwrap().as_int(), Some(20)); - - // Pop back to first frame - vm.pop_frame(); - - // First frame still has 100, 200 - assert_eq!(vm.read_register(0).unwrap().as_int(), Some(100)); - assert_eq!(vm.read_register(1).unwrap().as_int(), Some(200)); -} - -#[test] -fn test_global_variables() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - assert!(vm.get_global("x").is_none()); - - vm.set_global("x".to_string(), Value::int(42)); - assert_eq!(vm.get_global("x"), Some(Value::int(42))); - - vm.set_global("x".to_string(), Value::bool(true)); - assert_eq!(vm.get_global("x"), Some(Value::bool(true))); -} - -#[test] -fn test_no_gc_depth() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - assert_eq!(vm.no_gc_depth(), 0); - assert!(!vm.is_in_no_gc()); - - vm.enter_no_gc(); - assert_eq!(vm.no_gc_depth(), 1); - assert!(vm.is_in_no_gc()); - - vm.enter_no_gc(); - assert_eq!(vm.no_gc_depth(), 2); - assert!(vm.is_in_no_gc()); - - vm.exit_no_gc(); - assert_eq!(vm.no_gc_depth(), 1); - assert!(vm.is_in_no_gc()); - - vm.exit_no_gc(); - assert_eq!(vm.no_gc_depth(), 0); - assert!(!vm.is_in_no_gc()); - - // Extra exits should saturate at 0 - vm.exit_no_gc(); - assert_eq!(vm.no_gc_depth(), 0); -} - -#[test] -fn test_maybe_collect_respects_no_gc() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Capture baseline: builtins + auto-registered stdlib functions are rooted - vm.collect(); - let baseline = vm.heap().object_count(); - - // Allocate strings to fill heap past threshold - for i in 0..10000 { - vm.alloc_string(&format!("string_number_{}", i)).unwrap(); - } - - let objects_before = vm.heap().object_count(); - assert!(objects_before > baseline); - - // Enter no_gc - collection should be suppressed even if threshold is reached - vm.enter_no_gc(); - vm.maybe_collect(); - - // Objects should not be collected - assert_eq!(vm.heap().object_count(), objects_before); - - // Exit no_gc and force collection - vm.exit_no_gc(); - vm.collect(); - - // After GC, only rooted globals survive (builtins + auto-registered stdlib) - assert_eq!(vm.heap().object_count(), baseline); -} - -#[test] -fn test_collect_marks_registers() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create some strings - let str1 = vm.alloc_string("keep me").unwrap(); - let str2 = vm.alloc_string("free me").unwrap(); - - // Push a frame and store str1 in a register - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 5; - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - - // num_registers must match the function's register count for GC to mark them - let frame = CallFrame::new(func_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 5); - vm.push_frame(frame).unwrap(); - - vm.write_register(0, Value::ptr(str1.index())).unwrap(); - - // str2 is not rooted anywhere - // Force collection - vm.collect(); - - // str1 should be kept, str2 should be freed - assert!(vm.heap().get(str1).is_some()); - assert!(vm.heap().get(str2).is_none()); -} - -#[test] -fn test_collect_marks_globals() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let str1 = vm.alloc_string("global string").unwrap(); - let str2 = vm.alloc_string("unreachable").unwrap(); - - vm.set_global("my_str".to_string(), Value::ptr(str1.index())); - - vm.collect(); - - assert!(vm.heap().get(str1).is_some()); - assert!(vm.heap().get(str2).is_none()); -} - -#[test] -fn test_runtime_error_with_stack_trace() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("test_func".to_string()), 2); - func.num_registers = 10; - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - - let frame = CallFrame::new(func_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 0); - vm.push_frame(frame).unwrap(); - - let error = vm.runtime_error(RuntimeErrorKind::DivisionByZero); - - assert!(matches!(error.kind, RuntimeErrorKind::DivisionByZero)); - assert!(!error.stack_trace.is_empty()); -} -#[test] -fn test_current_frame_methods() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 5; - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - - let frame = CallFrame::new(func_ref, 0, std::ptr::null(), 0, std::ptr::null(), 0, 0); - vm.push_frame(frame).unwrap(); - - assert_eq!(vm.current_frame().unwrap().ip(), 0); - - vm.current_frame_mut().unwrap().advance_ip(); - assert_eq!(vm.current_frame().unwrap().ip(), 1); - - vm.current_frame_mut().unwrap().set_ip(42); - assert_eq!(vm.current_frame().unwrap().ip(), 42); -} - -// VM Execution Tests - -#[test] -fn test_execute_simple_return() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function that returns 42 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 2; - func.emit_b(OpCode::LoadI, 0, 42, 1); // r0 = 42 - func.emit_a(OpCode::Return, 0, 0, 0, 1); // return r0 - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_execute_arithmetic() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function that computes 10 + 20 - 5 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 5; - func.emit_b(OpCode::LoadI, 0, 10, 1); // r0 = 10 - func.emit_b(OpCode::LoadI, 1, 20, 1); // r1 = 20 - func.emit_a(OpCode::Add, 2, 0, 1, 1); // r2 = r0 + r1 (30) - func.emit_b(OpCode::LoadI, 3, 5, 1); // r3 = 5 - func.emit_a(OpCode::Sub, 4, 2, 3, 1); // r4 = r2 - r3 (25) - func.emit_a(OpCode::Return, 4, 0, 0, 1); // return r4 - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - - assert_eq!(result.as_int(), Some(25)); -} - -#[test] -fn test_execute_multiplication_division() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function that computes (6 * 7) / 2 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 5; - func.emit_b(OpCode::LoadI, 0, 6, 1); // r0 = 6 - func.emit_b(OpCode::LoadI, 1, 7, 1); // r1 = 7 - func.emit_a(OpCode::Mul, 2, 0, 1, 1); // r2 = r0 * r1 (42) - func.emit_b(OpCode::LoadI, 3, 2, 1); // r3 = 2 - func.emit_a(OpCode::Div, 4, 2, 3, 1); // r4 = r2 / r3 (21) - func.emit_a(OpCode::Return, 4, 0, 0, 1); // return r4 - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - - assert_eq!(result.as_int(), Some(21)); -} - -#[test] -fn test_execute_modulo() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function that computes 17 % 5 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 3; - func.emit_b(OpCode::LoadI, 0, 17, 1); // r0 = 17 - func.emit_b(OpCode::LoadI, 1, 5, 1); // r1 = 5 - func.emit_a(OpCode::Mod, 2, 0, 1, 1); // r2 = r0 % r1 (2) - func.emit_a(OpCode::Return, 2, 0, 0, 1); // return r2 - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - - assert_eq!(result.as_int(), Some(2)); -} - -#[test] -fn test_execute_negation() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function that computes -42 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 2; - func.emit_b(OpCode::LoadI, 0, 42, 1); // r0 = 42 - func.emit_a(OpCode::Neg, 1, 0, 0, 1); // r1 = -r0 - func.emit_a(OpCode::Return, 1, 0, 0, 1); // return r1 - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - - assert_eq!(result.as_int(), Some(-42)); -} - -#[test] -fn test_execute_division_by_zero() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function that divides by zero - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 3; - func.emit_b(OpCode::LoadI, 0, 10, 1); // r0 = 10 - func.emit_b(OpCode::LoadI, 1, 0, 1); // r1 = 0 - func.emit_a(OpCode::Div, 2, 0, 1, 1); // r2 = r0 / r1 (error!) - func.emit_a(OpCode::Return, 2, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref); - - assert!(result.is_err()); - if let Err(e) = result { - assert!(matches!(e.kind, RuntimeErrorKind::DivisionByZero)); - } -} - -#[test] -fn test_execute_comparison_operators() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test: 10 < 20 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 3; - func.emit_b(OpCode::LoadI, 0, 10, 1); // r0 = 10 - func.emit_b(OpCode::LoadI, 1, 20, 1); // r1 = 20 - func.emit_a(OpCode::Lt, 2, 0, 1, 1); // r2 = r0 < r1 - func.emit_a(OpCode::Return, 2, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_execute_equality() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test: 42 == 42 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 3; - func.emit_b(OpCode::LoadI, 0, 42, 1); // r0 = 42 - func.emit_b(OpCode::LoadI, 1, 42, 1); // r1 = 42 - func.emit_a(OpCode::Eq, 2, 0, 1, 1); // r2 = r0 == r1 - func.emit_a(OpCode::Return, 2, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_execute_not_equal() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test: 10 != 20 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 3; - func.emit_b(OpCode::LoadI, 0, 10, 1); // r0 = 10 - func.emit_b(OpCode::LoadI, 1, 20, 1); // r1 = 20 - func.emit_a(OpCode::Ne, 2, 0, 1, 1); // r2 = r0 != r1 - func.emit_a(OpCode::Return, 2, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_execute_logical_not() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test: !true - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 2; - func.emit_a(OpCode::LoadBool, 0, 1, 0, 1); // r0 = true - func.emit_a(OpCode::Not, 1, 0, 0, 1); // r1 = !r0 - func.emit_a(OpCode::Return, 1, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_bool(), Some(false)); -} - -#[test] -fn test_execute_jump() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test unconditional jump: skip setting r0 to 99 - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 1; - func.emit_b(OpCode::Jump, 0, 2, 1); // jump forward 2 instructions - func.emit_b(OpCode::LoadI, 0, 99, 1); // r0 = 99 (skipped) - func.emit_b(OpCode::LoadI, 0, 99, 1); // r0 = 99 (skipped) - func.emit_b(OpCode::LoadI, 0, 42, 1); // r0 = 42 (executed) - func.emit_a(OpCode::Return, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_execute_jump_if() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test conditional jump: if true, jump - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 2; - func.emit_a(OpCode::LoadBool, 0, 1, 0, 1); // r0 = true - func.emit_b(OpCode::JumpIf, 0, 1, 1); // if r0, jump forward 1 - func.emit_b(OpCode::LoadI, 1, 99, 1); // r1 = 99 (skipped) - func.emit_b(OpCode::LoadI, 1, 42, 1); // r1 = 42 (executed) - func.emit_a(OpCode::Return, 1, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_execute_jump_if_not() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test conditional jump: if not false, jump - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 2; - func.emit_a(OpCode::LoadBool, 0, 0, 0, 1); // r0 = false - func.emit_b(OpCode::JumpIfNot, 0, 1, 1); // if !r0, jump forward 1 - func.emit_b(OpCode::LoadI, 1, 99, 1); // r1 = 99 (skipped) - func.emit_b(OpCode::LoadI, 1, 42, 1); // r1 = 42 (executed) - func.emit_a(OpCode::Return, 1, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_execute_load_constant() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test loading a constant - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 1; - let k = func.add_constant(Value::int(12345)); - func.emit_a(OpCode::LoadK, 0, k as u8, 0, 1); // r0 = constants[k] - func.emit_a(OpCode::Return, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(12345)); -} - -#[test] -fn test_execute_load_null() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 1; - func.emit_a(OpCode::LoadNull, 0, 0, 0, 1); - func.emit_a(OpCode::Return, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert!(result.is_null()); -} - -#[test] -fn test_execute_load_bool() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 1; - func.emit_a(OpCode::LoadBool, 0, 1, 0, 1); // r0 = true - func.emit_a(OpCode::Return, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_bool(), Some(true)); -} - -#[test] -fn test_execute_move() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 2; - func.emit_b(OpCode::LoadI, 0, 42, 1); // r0 = 42 - func.emit_a(OpCode::Move, 1, 0, 0, 1); // r1 = r0 - func.emit_a(OpCode::Return, 1, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(42)); -} - -#[test] -fn test_execute_global_variables() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test setting and getting global variables - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 2; - - // Create a string constant for the variable name - let name_str = vm.heap_mut().intern_string("myvar"); - let k = func.add_constant(Value::ptr(name_str.index())); - - func.emit_b(OpCode::LoadI, 0, 123, 1); // r0 = 123 - func.emit_a(OpCode::SetGlobal, 0, k as u8, 0, 1); // myvar = r0 - func.emit_a(OpCode::GetGlobal, 1, k as u8, 0, 1); // r1 = myvar - func.emit_a(OpCode::Return, 1, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(123)); -} - -#[test] -fn test_execute_native_function_call() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Define a native function that adds two numbers - fn add_native(_vm: &mut VM, args: &[Value]) -> Result { - let a = args[0].as_int().unwrap_or(0); - let b = args[1].as_int().unwrap_or(0); - Ok(Value::int(a + b)) - } - - // Allocate native function - let native_ref = vm.alloc_native("add", 2, add_native).unwrap(); - - // Create bytecode function that calls the native function - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 5; - - // Add the native function as a constant first - let k = func.add_constant(Value::ptr(native_ref.index())); - - func.emit_a(OpCode::LoadK, 0, k as u8, 0, 1); // r0 = native function - func.emit_b(OpCode::LoadI, 1, 10, 1); // r1 = 10 (arg1) - func.emit_b(OpCode::LoadI, 2, 20, 1); // r2 = 20 (arg2) - func.emit_c(OpCode::Call, 3, 0, 2, 1); // r3 = r0(r1, r2) - func.emit_a(OpCode::Return, 3, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(30)); -} - -#[test] -fn test_execute_return0() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test Return0 (returns null) - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 1; - func.emit_b(OpCode::LoadI, 0, 42, 1); // r0 = 42 - func.emit_a(OpCode::Return0, 0, 0, 0, 1); // return null (ignore r0) - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert!(result.is_null()); -} - -#[test] -fn test_execute_no_gc_control() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Test EnterNoGc and ExitNoGc - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 1; - func.emit_a(OpCode::EnterNoGc, 0, 0, 0, 1); - func.emit_a(OpCode::ExitNoGc, 0, 0, 0, 1); - func.emit_b(OpCode::LoadI, 0, 42, 1); - func.emit_a(OpCode::Return, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref).unwrap(); - assert_eq!(result.as_int(), Some(42)); - assert!(!vm.is_in_no_gc()); -} - -#[test] -fn test_type_error_add_incompatible() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Try to add null + int - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 3; - func.emit_a(OpCode::LoadNull, 0, 0, 0, 1); - func.emit_b(OpCode::LoadI, 1, 10, 1); - func.emit_a(OpCode::Add, 2, 0, 1, 1); - func.emit_a(OpCode::Return, 2, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref); - assert!(result.is_err()); -} - -#[test] -fn test_globals_survive_gc() { - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Set a global value - vm.set_global("test_var".to_string(), Value::int(42)); - - // Force a GC collection - vm.collect(); - - // Globals should survive GC and remain accessible - let value = vm.get_global("test_var"); - assert!(value.is_some()); - assert_eq!(value.unwrap().as_int(), Some(42)); -} - -#[test] -fn test_callglobal_native_function() { - // Test that CallGlobal works with native functions (like type, alloc) - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a simple function that calls type(42) using CallGlobal - let mut func = Function::new(Some("main".to_string()), 0); - func.num_registers = 3; - func.global_layout = GlobalLayout::new(vec!["type".to_string()]); - - // r1 = 42, then r0 = type(r1) - func.emit_b(OpCode::LoadI, 1, 42, 1); - // CallGlobal r0, 0 (global_idx=0=type), 1 (nargs=1) - // Arguments start at r0+1=r1 - func.emit_a(OpCode::CallGlobal, 0, 0, 1, 1); - func.push_raw(0); // cache_word_1 - func.push_raw(0); // cache_word_2 with slot_id=0 - func.emit_a(OpCode::Return, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - let result = vm.execute(func_ref); - - // type(42) returns a string "int" - assert!(result.is_ok()); - let value = result.unwrap(); - assert!( - value.is_ptr(), - "type() should return a string (ptr), got {:?}", - value - ); -} - -#[test] -fn test_callglobal_user_defined_function() { - // Test that CallGlobal works with user-defined functions - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a simple "add" function that adds two numbers - let mut add_func = Function::new(Some("add".to_string()), 2); - add_func.num_registers = 3; - add_func.emit_a(OpCode::Add, 2, 0, 1, 1); // r2 = r0 + r1 - add_func.emit_a(OpCode::Return, 2, 0, 0, 1); - - add_func.finalize_bytecode(); - add_func.finalize_bytecode(); - let add_func_ref = vm.alloc_function(add_func).unwrap(); - - // Create a main function that calls add(10, 20) using CallGlobal - let mut main_func = Function::new(Some("main".to_string()), 0); - main_func.num_registers = 4; - main_func.global_layout = GlobalLayout::new(vec!["add".to_string()]); - - // Store add function in globals_by_index and globals hashmap - vm.set_global_by_index(0, Value::ptr(add_func_ref.index())); - vm.set_global("add".to_string(), Value::ptr(add_func_ref.index())); - - // Setup arguments: r1 = 10, r2 = 20 - main_func.emit_b(OpCode::LoadI, 1, 10, 1); // r1 = 10 - main_func.emit_b(OpCode::LoadI, 2, 20, 1); // r2 = 20 - // CallGlobal r0, 0 (global_idx=0=add), 2 (nargs=2) - main_func.emit_a(OpCode::CallGlobal, 0, 0, 2, 1); - main_func.push_raw(0); // cache_word_1 - main_func.push_raw(0); // cache_word_2 with slot_id=0 - main_func.emit_a(OpCode::Return, 0, 0, 0, 1); - - main_func.finalize_bytecode(); - main_func.finalize_bytecode(); - let main_func_ref = vm.alloc_function(main_func).unwrap(); - let result = vm.execute(main_func_ref); - - assert!(result.is_ok()); - let value = result.unwrap(); - assert_eq!(value.as_int(), Some(30), "add(10, 20) should return 30"); -} - -#[test] -fn test_callglobal_recursive_function() { - // Test that CallGlobal works with recursive functions (inline cache hit) - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a recursive factorial function - // fn fact(n) { if n < 2 { return 1 } return n * fact(n - 1) } - let mut fact_func = Function::new(Some("fact".to_string()), 1); - fact_func.num_registers = 5; - fact_func.global_layout = GlobalLayout::new(vec!["fact".to_string()]); - - // if n < 2 - fact_func.emit_b(OpCode::LoadI, 1, 2, 1); // r1 = 2 - fact_func.emit_a(OpCode::Lt, 2, 0, 1, 1); // r2 = r0 < r1 (n < 2) - fact_func.emit_b(OpCode::JumpIfNot, 2, 2, 1); // if not (n < 2), skip 2 instructions - - // return 1 - fact_func.emit_b(OpCode::LoadI, 3, 1, 1); // r3 = 1 - fact_func.emit_a(OpCode::Return, 3, 0, 0, 1); // return 1 - - // return n * fact(n - 1) - fact_func.emit_a(OpCode::SubI, 2, 0, 1, 1); // r2 = n - 1 - fact_func.emit_a(OpCode::CallGlobal, 3, 0, 1, 1); // r3 = fact(r2) - args at r3+1=r4, so we need to put r2 in the right place - - // Actually, let me redo this more carefully - // For CallGlobal r3, 0, 1: result in r3, global_idx=0, nargs=1 - // Arguments should be at r3+1 = r4 - // So we need to load r4 = n - 1 - let mut fact_func = Function::new(Some("fact".to_string()), 1); - fact_func.num_registers = 6; - fact_func.global_layout = GlobalLayout::new(vec!["fact".to_string()]); - - // if n < 2 - fact_func.emit_b(OpCode::LoadI, 1, 2, 1); // r1 = 2 - fact_func.emit_a(OpCode::Lt, 2, 0, 1, 1); // r2 = r0 < r1 (n < 2) - fact_func.emit_b(OpCode::JumpIfNot, 2, 2, 1); // if not (n < 2), skip 2 instructions - - // return 1 - fact_func.emit_b(OpCode::LoadI, 3, 1, 1); // r3 = 1 - fact_func.emit_a(OpCode::Return, 3, 0, 0, 1); // return 1 - - // return n * fact(n - 1) - // For CallGlobal with dest=3, args must be at r4 - fact_func.emit_a(OpCode::SubI, 4, 0, 1, 1); // r4 = n - 1 - fact_func.emit_a(OpCode::CallGlobal, 3, 0, 1, 1); // r3 = fact(r4) where args at r3+1=r4 - fact_func.push_raw(0); // cache_word_1 - fact_func.push_raw(0); // cache_word_2 with slot_id=0 - fact_func.emit_a(OpCode::Mul, 5, 0, 3, 1); // r5 = n * r3 - fact_func.emit_a(OpCode::Return, 5, 0, 0, 1); // return r5 - - fact_func.finalize_bytecode(); - let fact_func_ref = vm.alloc_function(fact_func).unwrap(); - vm.set_global_by_index(0, Value::ptr(fact_func_ref.index())); - vm.set_global("fact".to_string(), Value::ptr(fact_func_ref.index())); - - // Create main function that calls fact(5) - let mut main_func = Function::new(Some("main".to_string()), 0); - main_func.num_registers = 3; - main_func.global_layout = GlobalLayout::new(vec!["fact".to_string()]); - - main_func.emit_b(OpCode::LoadI, 1, 5, 1); // r1 = 5 (argument at dest+1=r0+1=r1) - main_func.emit_a(OpCode::CallGlobal, 0, 0, 1, 1); // r0 = fact(5) - main_func.push_raw(0); // cache_word_1 - main_func.push_raw(1); // cache_word_2 with slot_id=1 (different from fact's slot) - main_func.emit_a(OpCode::Return, 0, 0, 0, 1); - - main_func.finalize_bytecode(); - let main_func_ref = vm.alloc_function(main_func).unwrap(); - let result = vm.execute(main_func_ref); - - assert!(result.is_ok()); - let value = result.unwrap(); - assert_eq!(value.as_int(), Some(120), "fact(5) should return 120"); -} - -#[test] -fn test_callglobal_arity_mismatch() { - // Test that CallGlobal properly reports arity mismatches - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function that takes 2 arguments - let mut add_func = Function::new(Some("add".to_string()), 2); - add_func.num_registers = 3; - add_func.emit_a(OpCode::Add, 2, 0, 1, 1); - add_func.emit_a(OpCode::Return, 2, 0, 0, 1); - - add_func.finalize_bytecode(); - let add_func_ref = vm.alloc_function(add_func).unwrap(); - vm.set_global_by_index(0, Value::ptr(add_func_ref.index())); - vm.set_global("add".to_string(), Value::ptr(add_func_ref.index())); - - // Create main function that calls add with wrong number of args - let mut main_func = Function::new(Some("main".to_string()), 0); - main_func.num_registers = 3; - main_func.global_layout = GlobalLayout::new(vec!["add".to_string()]); - - main_func.emit_b(OpCode::LoadI, 1, 10, 1); // Only provide 1 arg - main_func.emit_a(OpCode::CallGlobal, 0, 0, 1, 1); // Call with nargs=1, but add expects 2 - main_func.push_raw(0); // cache_word_1 - main_func.push_raw(0); // cache_word_2 with slot_id=0 - main_func.emit_a(OpCode::Return, 0, 0, 0, 1); - - main_func.finalize_bytecode(); - let main_func_ref = vm.alloc_function(main_func).unwrap(); - let result = vm.execute(main_func_ref); - - assert!(result.is_err(), "Should fail with arity mismatch"); - if let Err(err) = result { - assert!(matches!( - err.kind, - RuntimeErrorKind::ArityMismatch { - expected: 2, - got: 1 - } - )); - } -} - -#[test] -fn test_callglobal_cache_invalidation_on_gc() { - // Test that the CallGlobal cache is properly invalidated after GC - let source = make_test_source(); - let mut vm = VM::new(source).unwrap(); - - // Create a function - let mut func = Function::new(Some("test".to_string()), 0); - func.num_registers = 1; - func.emit_b(OpCode::LoadI, 0, 42, 1); - func.emit_a(OpCode::Return, 0, 0, 0, 1); - - func.finalize_bytecode(); - func.finalize_bytecode(); - let func_ref = vm.alloc_function(func).unwrap(); - // Store in both globals HashMap (for GC roots) and globals_by_index - vm.set_global("test".to_string(), Value::ptr(func_ref.index())); - vm.set_global_by_index(0, Value::ptr(func_ref.index())); - - // Create main that calls the function - let mut main_func = Function::new(Some("main".to_string()), 0); - main_func.num_registers = 2; - main_func.global_layout = GlobalLayout::new(vec!["test".to_string()]); - - main_func.emit_a(OpCode::CallGlobal, 0, 0, 0, 1); - main_func.push_raw(0); // cache_word_1 - main_func.push_raw(0); // cache_word_2 with slot_id=0 - main_func.emit_a(OpCode::Return, 0, 0, 0, 1); - - main_func.finalize_bytecode(); - let main_func_ref = vm.alloc_function(main_func).unwrap(); - - // Execute once to populate cache - let result1 = vm.execute(main_func_ref); - assert!(result1.is_ok()); - assert_eq!(result1.unwrap().as_int(), Some(42)); - - // Force GC - this should clear the call_global_cache - vm.collect(); - - // Execute again - should work even after cache invalidation - // The cache should be re-populated on the next call - let mut main_func2 = Function::new(Some("main".to_string()), 0); - main_func2.num_registers = 2; - main_func2.global_layout = GlobalLayout::new(vec!["test".to_string()]); - main_func2.emit_a(OpCode::CallGlobal, 0, 0, 0, 1); - main_func2.push_raw(0); // cache_word_1 - main_func2.push_raw(1); // cache_word_2 with slot_id=1 - main_func2.emit_a(OpCode::Return, 0, 0, 0, 1); - main_func2.finalize_bytecode(); - let main_func_ref2 = vm.alloc_function(main_func2).unwrap(); - - let result2 = vm.execute(main_func_ref2); - assert!( - result2.is_ok(), - "Execution should succeed after GC: {:?}", - result2 - ); - assert_eq!(result2.unwrap().as_int(), Some(42)); -} diff --git a/air/src/layout.rs b/air/src/layout.rs index 3af6b02..a6ea97d 100644 --- a/air/src/layout.rs +++ b/air/src/layout.rs @@ -1,4 +1,4 @@ -use crate::{AirProgram, AirStructDef, AirType}; +use crate::{AirEnumDef, AirProgram, AirStructDef, AirType}; use std::collections::{HashMap, HashSet}; #[derive(Debug, Clone, Copy)] @@ -13,10 +13,13 @@ pub fn layout_of(ty: &AirType) -> TypeLayout { AirType::I16 | AirType::U16 => TypeLayout { size: 2, align: 2 }, AirType::I32 | AirType::U32 | AirType::F32 => TypeLayout { size: 4, align: 4 }, AirType::I64 | AirType::U64 | AirType::F64 => TypeLayout { size: 8, align: 8 }, - AirType::Ptr(_) | AirType::Str | AirType::FnPtr { .. } => TypeLayout { size: 8, align: 8 }, + AirType::Ptr(_) => TypeLayout { size: 8, align: 8 }, + // Aelys function values are fat pointers { fn_ptr: ptr, env_ptr: ptr } = 16 bytes. + AirType::FnPtr { .. } => TypeLayout { size: 16, align: 8 }, + AirType::Str => TypeLayout { size: 16, align: 8 }, AirType::Void => TypeLayout { size: 0, align: 1 }, AirType::Slice(_) => TypeLayout { size: 16, align: 8 }, - AirType::Param(_) => TypeLayout { size: 8, align: 8 }, + AirType::Param(_) | AirType::Opaque => TypeLayout { size: 8, align: 8 }, AirType::Array(inner, n) => { let el = layout_of(inner); TypeLayout { @@ -24,13 +27,16 @@ pub fn layout_of(ty: &AirType) -> TypeLayout { align: el.align, } } + // Simple enum layout (tag only). Data enums use their registered LLVM struct + // type in codegen, so this is only used for simple enums without data variants. + AirType::Enum(_) => TypeLayout { size: 4, align: 4 }, AirType::Struct(name) => { panic!("layout_of: Struct({name}) requires program context; run compute_layouts first") } } } -pub fn compute_layouts(program: &mut AirProgram) { +pub fn compute_layouts(program: &mut AirProgram) -> Vec { let name_to_idx: HashMap = program .structs .iter() @@ -38,27 +44,148 @@ pub fn compute_layouts(program: &mut AirProgram) { .map(|(i, s)| (s.name.clone(), i)) .collect(); - detect_self_references(&program.structs); - let order = topological_order(&program.structs, &name_to_idx); + let errors = detect_self_references(&program.structs); + if !errors.is_empty() { + return errors; + } + let order = match topological_order(&program.structs, &name_to_idx) { + Ok(o) => o, + Err(e) => return vec![e], + }; let mut resolved: HashMap = HashMap::new(); + let mut remaining_enums: Vec = (0..program.enums.len()).collect(); + let mut remaining_structs: Vec = order; + let max_iterations = remaining_enums.len() + remaining_structs.len() + 1; - for idx in order { - let (total, offsets) = struct_layout(&program.structs[idx], &resolved); - resolved.insert(program.structs[idx].name.clone(), total); - for (i, off) in offsets.into_iter().enumerate() { - program.structs[idx].fields[i].offset = Some(off); + for _ in 0..max_iterations { + if remaining_enums.is_empty() && remaining_structs.is_empty() { + break; } + + let mut progress = false; + + let mut next_enums = Vec::new(); + for &idx in &remaining_enums { + let def = &program.enums[idx]; + if !enum_has_data(def) { + progress |= resolved + .insert(def.name.clone(), TypeLayout { size: 4, align: 4 }) + .is_none(); + continue; + } + if !types_resolved(def.variants.iter().flat_map(|v| v.payload.iter()), &resolved) { + next_enums.push(idx); + continue; + } + + let payload_size = enum_max_payload_size(def, &resolved); + let layout = if payload_size == 0 { + TypeLayout { size: 4, align: 4 } + } else { + let payload_align = enum_max_payload_align(def, &resolved); + let total_align = 4u32.max(payload_align); + let payload_offset = align_to(4, payload_align); + let total_size = align_to(payload_offset + payload_size, total_align); + TypeLayout { + size: total_size, + align: total_align, + } + }; + progress |= resolved.insert(def.name.clone(), layout).is_none(); + } + remaining_enums = next_enums; + + let mut next_structs = Vec::new(); + for &idx in &remaining_structs { + let def = &program.structs[idx]; + if !types_resolved(def.fields.iter().map(|f| &f.ty), &resolved) { + next_structs.push(idx); + continue; + } + + let (total, offsets) = struct_layout(def, &resolved); + progress |= resolved.insert(def.name.clone(), total).is_none(); + for (i, off) in offsets.into_iter().enumerate() { + program.structs[idx].fields[i].offset = Some(off); + } + } + remaining_structs = next_structs; + + if !progress { + break; + } + } + + if !remaining_enums.is_empty() || !remaining_structs.is_empty() { + let mut names = Vec::new(); + names.extend( + remaining_structs + .iter() + .map(|&idx| format!("struct {}", program.structs[idx].name)), + ); + names.extend( + remaining_enums + .iter() + .map(|&idx| format!("enum {}", program.enums[idx].name)), + ); + return vec![format!( + "recursive type cycle involving by-value enums/structs: {}", + names.join(" <-> ") + )]; } + + program.struct_sizes = resolved; + Vec::new() +} + +fn types_resolved<'a, I>(types: I, resolved: &HashMap) -> bool +where + I: IntoIterator, +{ + types.into_iter().all(|ty| type_resolved(ty, resolved)) } -fn resolved_layout(ty: &AirType, structs: &HashMap) -> TypeLayout { +fn type_resolved(ty: &AirType, resolved: &HashMap) -> bool { match ty { - AirType::Struct(name) => *structs + AirType::Struct(name) | AirType::Enum(name) => resolved.contains_key(name.as_str()), + AirType::Array(inner, _) => type_resolved(inner, resolved), + AirType::I8 + | AirType::U8 + | AirType::Bool + | AirType::I16 + | AirType::U16 + | AirType::I32 + | AirType::U32 + | AirType::F32 + | AirType::I64 + | AirType::U64 + | AirType::F64 + | AirType::Ptr(_) + | AirType::FnPtr { .. } + | AirType::Str + | AirType::Void + | AirType::Slice(_) + | AirType::Param(_) + | AirType::Opaque => true, + } +} + +pub fn resolved_layout(ty: &AirType, sizes: &HashMap) -> TypeLayout { + match ty { + AirType::Struct(name) => *sizes .get(name.as_str()) .unwrap_or_else(|| panic!("struct `{name}` referenced before its layout is computed")), + AirType::Enum(name) => { + // Look up pre-computed enum size. Falls back to tag-only (4 bytes) + // for simple enums that weren't added to the map. + sizes + .get(name.as_str()) + .copied() + .unwrap_or(TypeLayout { size: 4, align: 4 }) + } AirType::Array(inner, n) => { - let el = resolved_layout(inner, structs); + let el = resolved_layout(inner, sizes); TypeLayout { size: el.size * (*n as u32), align: el.align, @@ -95,17 +222,19 @@ fn align_to(offset: u32, align: u32) -> u32 { (offset + align - 1) & !(align - 1) } -fn detect_self_references(structs: &[AirStructDef]) { +fn detect_self_references(structs: &[AirStructDef]) -> Vec { + let mut errors = Vec::new(); for def in structs { for field in &def.fields { if references_by_value(&field.ty, &def.name) { - panic!( + errors.push(format!( "struct `{}` has infinite size: field `{}` contains `{}` by value", def.name, field.name, def.name - ); + )); } } } + errors } fn references_by_value(ty: &AirType, target: &str) -> bool { @@ -126,7 +255,7 @@ fn field_struct_deps(ty: &AirType, deps: &mut HashSet) { } } -fn topological_order(structs: &[AirStructDef], name_to_idx: &HashMap) -> Vec { +fn topological_order(structs: &[AirStructDef], name_to_idx: &HashMap) -> Result, String> { let n = structs.len(); let mut in_degree = vec![0u32; n]; let mut dependents: Vec> = vec![vec![]; n]; @@ -163,8 +292,48 @@ fn topological_order(structs: &[AirStructDef], name_to_idx: &HashMap 0) .map(|i| structs[i].name.as_str()) .collect(); - panic!("recursive struct cycle: {}", cycle.join(" <-> ")); + return Err(format!("recursive struct cycle: {}", cycle.join(" <-> "))); } - order + Ok(order) +} + +/// Returns true if the enum has any data variants (non-empty payload). +pub fn enum_has_data(def: &AirEnumDef) -> bool { + def.variants.iter().any(|v| !v.payload.is_empty()) +} + +/// Compute the max alignment needed across all payload fields of a data enum. +fn enum_max_payload_align(def: &AirEnumDef, sizes: &HashMap) -> u32 { + def.variants + .iter() + .flat_map(|v| v.payload.iter()) + .map(|ty| resolved_layout(ty, sizes).align) + .max() + .unwrap_or(1) +} + +/// Compute the max payload size in bytes across all variants of a data enum. +/// Each variant's payload is laid out with proper alignment padding between fields, +/// matching the aligned offsets that codegen uses when storing fields. +/// +/// `struct_sizes` must contain computed sizes for any struct types that appear +/// in enum variant payloads. Pass `&program.struct_sizes` after `compute_layouts`. +pub fn enum_max_payload_size( + def: &AirEnumDef, + struct_sizes: &HashMap, +) -> u32 { + def.variants + .iter() + .map(|v| { + let mut offset = 0u32; + for ty in &v.payload { + let layout = resolved_layout(ty, struct_sizes); + offset = (offset + layout.align - 1) & !(layout.align - 1); + offset += layout.size; + } + offset + }) + .max() + .unwrap_or(0) } diff --git a/air/src/lib.rs b/air/src/lib.rs index 9a22118..b49357c 100644 --- a/air/src/lib.rs +++ b/air/src/lib.rs @@ -3,6 +3,7 @@ pub mod layout; pub mod lower; pub mod mono; +pub mod passes; pub mod print; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -40,9 +41,12 @@ pub enum AirType { F32, F64, Bool, + /// Byte string slice ABI: (ptr, len), never NUL-terminated. + /// Payload may contain internal '\0' bytes. Str, Ptr(Box), Struct(String), + Enum(String), Array(Box, u64), Slice(Box), FnPtr { @@ -51,9 +55,28 @@ pub enum AirType { conv: CallingConv, }, Param(TypeParamId), + /// unresolved Dynamic type from sema. must be eliminated by monomorphization before reaching codegen. + /// the validation pass rejects any Opaque that survives past the AIR pipeline + Opaque, Void, } +impl AirType { + pub fn int_size(&self) -> Option { + match self { + AirType::I8 => Some(AirIntSize::I8), + AirType::I16 => Some(AirIntSize::I16), + AirType::I32 => Some(AirIntSize::I32), + AirType::I64 => Some(AirIntSize::I64), + AirType::U8 => Some(AirIntSize::U8), + AirType::U16 => Some(AirIntSize::U16), + AirType::U32 => Some(AirIntSize::U32), + AirType::U64 => Some(AirIntSize::U64), + _ => None, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AirIntSize { I8, @@ -88,13 +111,31 @@ pub struct AirStructField { pub offset: Option, } +#[derive(Clone)] +pub struct AirEnumVariant { + pub name: String, + pub tag: u32, + pub payload: Vec, // empty = unit variant, non-empty = data variant +} + +#[derive(Clone)] +pub struct AirEnumDef { + pub name: String, + pub type_params: Vec, + pub variants: Vec, + pub span: Option, +} + #[derive(Clone)] pub struct AirProgram { pub functions: Vec, pub structs: Vec, + pub enums: Vec, pub globals: Vec, pub source_files: Vec, pub mono_instances: Vec, + /// Computed struct sizes (populated by `compute_layouts`). + pub struct_sizes: std::collections::HashMap, } #[derive(Clone)] @@ -246,7 +287,34 @@ pub enum Rvalue { from: AirType, to: AirType, }, - Discriminant(Operand), + Index { + base: Operand, + index: Operand, + }, + EnumInit { + enum_name: String, + variant: String, + tag: u32, + payload: Vec, // empty for unit variants + }, + /// Extract the i32 tag from an enum value. + EnumTag { + enum_name: String, + operand: Operand, + }, + /// Extract a payload field from an enum value by variant tag and field index. + EnumPayload { + enum_name: String, + tag: u32, + operand: Operand, + field_index: u32, + }, + /// Build a fat pointer `{ fn_ptr, env_ptr }` from a function name and + /// an environment pointer (or Null for non-capturing closures / named fns). + ClosureCreate { + fn_name: String, + env: Operand, + }, } #[derive(Clone)] @@ -272,8 +340,22 @@ pub enum AirConst { Bool(bool), Str(String), Null, + /// Reference to a named function as a pointer value (for lambdas / first-class functions). + FnRef(String), + Enum { + enum_name: String, + tag: u32, + payload: Vec, + }, ZeroInit(AirType), Undef(AirType), + /// Compile-time constant array (all elements must also be constants). + Array(Vec), + /// Compile-time constant struct literal (fields in canonical declaration order). + Struct { + name: String, + fields: Vec<(String, AirConst)>, + }, } #[derive(Clone)] diff --git a/air/src/lower.rs b/air/src/lower.rs deleted file mode 100644 index a2b9631..0000000 --- a/air/src/lower.rs +++ /dev/null @@ -1,1605 +0,0 @@ -use crate::*; -use aelys_sema::{ - InferType, TypedExpr, TypedExprKind, TypedFmtStringPart, TypedFunction, TypedParam, - TypedProgram, TypedStmt, TypedStmtKind, -}; -use aelys_syntax::BinaryOp; - -pub fn lower(program: &TypedProgram) -> AirProgram { - let mut cx = LoweringContext::new(program); - cx.lower_program(); - cx.finish() -} - -pub fn lower_with_gc_mode(program: &TypedProgram, file_gc_mode: GcMode) -> AirProgram { - let mut cx = LoweringContext::new(program); - cx.file_gc_mode = file_gc_mode; - cx.lower_program(); - cx.finish() -} - -struct LoweringContext<'a> { - program: &'a TypedProgram, - functions: Vec, - structs: Vec, - globals: Vec, - source_files: Vec, - next_function_id: u32, - next_local_id: u32, - next_block_id: u32, - file_gc_mode: GcMode, - current_blocks: Vec, - current_locals: Vec, - current_params: Vec, - current_stmts: Vec, - locals_by_name: Vec<(String, LocalId)>, - loop_stack: Vec, - type_params_map: Vec<(String, TypeParamId)>, - pending_block_id: Option, - block_aliases: Vec<(u32, u32)>, -} - -struct LoopBlocks { - header: BlockId, - exit: BlockId, -} - -impl<'a> LoweringContext<'a> { - fn new(program: &'a TypedProgram) -> Self { - Self { - program, - functions: Vec::new(), - structs: Vec::new(), - globals: Vec::new(), - source_files: vec![program.source.name.clone()], - next_function_id: 0, - next_local_id: 0, - next_block_id: 0, - file_gc_mode: GcMode::Managed, - current_blocks: Vec::new(), - current_locals: Vec::new(), - current_params: Vec::new(), - current_stmts: Vec::new(), - locals_by_name: Vec::new(), - loop_stack: Vec::new(), - type_params_map: Vec::new(), - pending_block_id: None, - block_aliases: Vec::new(), - } - } - - fn finish(self) -> AirProgram { - AirProgram { - functions: self.functions, - structs: self.structs, - globals: self.globals, - source_files: self.source_files, - mono_instances: Vec::new(), - } - } - - fn alloc_function_id(&mut self) -> FunctionId { - let id = FunctionId(self.next_function_id); - self.next_function_id += 1; - id - } - - fn alloc_local_id(&mut self) -> LocalId { - let id = LocalId(self.next_local_id); - self.next_local_id += 1; - id - } - - fn alloc_block_id(&mut self) -> BlockId { - let id = BlockId(self.next_block_id); - self.next_block_id += 1; - id - } - - fn alloc_temp(&mut self, ty: AirType) -> LocalId { - let id = self.alloc_local_id(); - self.current_locals.push(AirLocal { - id, - ty, - name: None, - is_mut: false, - span: None, - }); - id - } - - fn alloc_named_local( - &mut self, - name: &str, - ty: AirType, - is_mut: bool, - span: Option, - ) -> LocalId { - let id = self.alloc_local_id(); - self.current_locals.push(AirLocal { - id, - ty, - name: Some(name.to_string()), - is_mut, - span, - }); - self.locals_by_name.push((name.to_string(), id)); - id - } - - fn lookup_local(&self, name: &str) -> Option { - self.locals_by_name - .iter() - .rev() - .find(|(n, _)| n == name) - .map(|(_, id)| *id) - } - - fn emit(&mut self, kind: AirStmtKind, span: Option) { - self.current_stmts.push(AirStmt { kind, span }); - } - - fn seal_block(&mut self, terminator: AirTerminator) -> BlockId { - let id = self - .pending_block_id - .take() - .unwrap_or_else(|| self.alloc_block_id()); - self.current_blocks.push(AirBlock { - id, - stmts: std::mem::take(&mut self.current_stmts), - terminator, - }); - id - } - - fn span(&self, s: &aelys_syntax::Span) -> Span { - Span { - file: 0, - lo: s.start as u32, - hi: s.end as u32, - } - } - - fn lower_type_params(&mut self, type_params: &[String]) -> Vec { - type_params - .iter() - .enumerate() - .map(|(i, name)| { - let id = TypeParamId(i as u32); - self.type_params_map.push((name.clone(), id)); - id - }) - .collect() - } - - fn lower_type_from_infer(&self, ty: &InferType) -> AirType { - match ty { - InferType::I8 => AirType::I8, - InferType::I16 => AirType::I16, - InferType::I32 => AirType::I32, - InferType::I64 => AirType::I64, - InferType::U8 => AirType::U8, - InferType::U16 => AirType::U16, - InferType::U32 => AirType::U32, - InferType::U64 => AirType::U64, - InferType::F32 => AirType::F32, - InferType::F64 => AirType::F64, - InferType::Bool => AirType::Bool, - InferType::String => AirType::Str, - InferType::Null => AirType::Void, - InferType::Function { params, ret } => AirType::FnPtr { - params: params - .iter() - .map(|p| self.lower_type_from_infer(p)) - .collect(), - ret: Box::new(self.lower_type_from_infer(ret)), - conv: CallingConv::Aelys, - }, - InferType::Array(inner) => AirType::Slice(Box::new(self.lower_type_from_infer(inner))), - InferType::Vec(inner) => AirType::Slice(Box::new(self.lower_type_from_infer(inner))), - InferType::Tuple(_) => AirType::Void, - InferType::Range => AirType::Void, - InferType::Struct(name) => { - if let Some((_, id)) = self.type_params_map.iter().find(|(n, _)| n == name) { - AirType::Param(*id) - } else { - AirType::Struct(name.clone()) - } - } - InferType::Var(_) | InferType::Dynamic => AirType::I64, - } - } - - fn gc_mode_for_function(&self, func: &TypedFunction) -> GcMode { - if func.decorators.iter().any(|d| d.name == "no_gc") { - GcMode::Manual - } else { - self.file_gc_mode - } - } - - // ======================================================================== - // Program - // ======================================================================== - - fn lower_program(&mut self) { - for stmt in &self.program.stmts { - if let TypedStmtKind::StructDecl { - name, - type_params, - fields, - } = &stmt.kind - { - self.lower_struct_decl(name, type_params, fields, &stmt.span); - } - } - - let stmts: Vec<_> = self.program.stmts.clone(); - for stmt in &stmts { - match &stmt.kind { - TypedStmtKind::Function(func) => self.lower_function(func), - TypedStmtKind::StructDecl { .. } => {} - _ => self.lower_toplevel_stmt(stmt), - } - } - } - - fn lower_struct_decl( - &mut self, - name: &str, - type_params: &[String], - fields: &[(String, InferType)], - span: &aelys_syntax::Span, - ) { - let air_type_params = self.lower_type_params(type_params); - let air_fields = fields - .iter() - .map(|(fname, fty)| AirStructField { - name: fname.clone(), - ty: self.lower_type_from_infer(fty), - offset: None, - }) - .collect(); - self.structs.push(AirStructDef { - name: name.to_string(), - type_params: air_type_params, - fields: air_fields, - is_closure_env: false, - span: Some(self.span(span)), - }); - self.type_params_map.clear(); - } - - // ======================================================================== - // Functions - // ======================================================================== - - fn lower_function(&mut self, func: &TypedFunction) { - let saved_locals = std::mem::take(&mut self.current_locals); - let saved_params = std::mem::take(&mut self.current_params); - let saved_blocks = std::mem::take(&mut self.current_blocks); - let saved_stmts = std::mem::take(&mut self.current_stmts); - let saved_names = std::mem::take(&mut self.locals_by_name); - let saved_aliases = std::mem::take(&mut self.block_aliases); - let saved_pending = self.pending_block_id.take(); - let saved_next_local = self.next_local_id; - let saved_next_block = self.next_block_id; - self.next_local_id = 0; - self.next_block_id = 0; - - let func_id = self.alloc_function_id(); - let gc_mode = self.gc_mode_for_function(func); - - if !func.captures.is_empty() { - self.lower_closure(func, func_id, gc_mode); - } else { - self.lower_plain_function(func, func_id, gc_mode); - } - - self.current_locals = saved_locals; - self.current_params = saved_params; - self.current_blocks = saved_blocks; - self.current_stmts = saved_stmts; - self.locals_by_name = saved_names; - self.block_aliases = saved_aliases; - self.pending_block_id = saved_pending; - self.next_local_id = saved_next_local; - self.next_block_id = saved_next_block; - } - - fn lower_plain_function(&mut self, func: &TypedFunction, func_id: FunctionId, gc_mode: GcMode) { - let type_params = self.lower_type_params(&func.type_params); - let params = self.lower_params(&func.params); - let ret_ty = self.lower_type_from_infer(&func.return_type); - - self.lower_body(&func.body); - self.finalize_function_body(); - self.resolve_block_aliases(); - - let air_func = AirFunction { - id: func_id, - name: func.name.clone(), - gc_mode, - type_params, - params, - ret_ty, - locals: std::mem::take(&mut self.current_locals), - blocks: std::mem::take(&mut self.current_blocks), - is_extern: false, - calling_conv: CallingConv::Aelys, - attributes: self.func_attribs(func), - span: Some(self.span(&func.span)), - }; - self.functions.push(air_func); - self.type_params_map.clear(); - } - - fn lower_closure(&mut self, func: &TypedFunction, func_id: FunctionId, gc_mode: GcMode) { - let type_params = self.lower_type_params(&func.type_params); - - let env_name = format!("__closure_env_{}", func.name); - let env_fields: Vec = func - .captures - .iter() - .map(|(name, ty)| AirStructField { - name: name.clone(), - ty: self.lower_type_from_infer(ty), - offset: None, - }) - .collect(); - - self.structs.push(AirStructDef { - name: env_name.clone(), - type_params: Vec::new(), - fields: env_fields, - is_closure_env: true, - span: Some(self.span(&func.span)), - }); - - let env_param_id = self.alloc_local_id(); - let env_ty = AirType::Ptr(Box::new(AirType::Struct(env_name.clone()))); - self.current_params.push(AirParam { - id: env_param_id, - ty: env_ty.clone(), - name: "__env".to_string(), - span: Some(self.span(&func.span)), - }); - - for (cap_name, cap_ty) in &func.captures { - let local_id = - self.alloc_named_local(cap_name, self.lower_type_from_infer(cap_ty), false, None); - self.emit( - AirStmtKind::Assign { - place: Place::Local(local_id), - rvalue: Rvalue::FieldAccess { - base: Operand::Copy(env_param_id), - field: cap_name.clone(), - }, - }, - None, - ); - } - - let user_params = self.lower_params(&func.params); - let ret_ty = self.lower_type_from_infer(&func.return_type); - - self.lower_body(&func.body); - self.finalize_function_body(); - self.resolve_block_aliases(); - - let mut all_params = vec![self.current_params.remove(0)]; - all_params.extend(user_params); - - let air_func = AirFunction { - id: func_id, - name: func.name.clone(), - gc_mode, - type_params, - params: all_params, - ret_ty, - locals: std::mem::take(&mut self.current_locals), - blocks: std::mem::take(&mut self.current_blocks), - is_extern: false, - calling_conv: CallingConv::Aelys, - attributes: self.func_attribs(func), - span: Some(self.span(&func.span)), - }; - self.functions.push(air_func); - self.type_params_map.clear(); - } - - fn lower_params(&mut self, params: &[TypedParam]) -> Vec { - params - .iter() - .map(|p| { - let ty = self.lower_type_from_infer(&p.ty); - let id = self.alloc_named_local( - &p.name, - ty.clone(), - p.mutable, - Some(self.span(&p.span)), - ); - AirParam { - id, - ty, - name: p.name.clone(), - span: Some(self.span(&p.span)), - } - }) - .collect() - } - - fn func_attribs(&self, func: &TypedFunction) -> FunctionAttribs { - let inline = if func.decorators.iter().any(|d| d.name == "inline_always") { - InlineHint::Always - } else if func.decorators.iter().any(|d| d.name == "inline_never") { - InlineHint::Never - } else { - InlineHint::Default - }; - FunctionAttribs { - inline, - no_gc: func.decorators.iter().any(|d| d.name == "no_gc"), - no_unwind: false, - cold: func.decorators.iter().any(|d| d.name == "cold"), - } - } - - // ======================================================================== - // Top-level statements (global initializers) - // ======================================================================== - - fn lower_toplevel_stmt(&mut self, stmt: &TypedStmt) { - if let TypedStmtKind::Let { - name, - initializer, - var_type, - .. - } = &stmt.kind - { - let ty = self.lower_type_from_infer(var_type); - let init = self.try_const_expr(initializer); - self.globals.push(AirGlobal { - name: name.clone(), - ty, - init, - gc_mode: self.file_gc_mode, - span: Some(self.span(&stmt.span)), - }); - } - } - - fn try_const_expr(&self, expr: &TypedExpr) -> Option { - match &expr.kind { - TypedExprKind::Int(v) => { - if expr.ty.is_integer() { - Some(AirConst::Int(*v, infer_to_int_size(&expr.ty))) - } else { - Some(AirConst::IntLiteral(*v)) - } - } - TypedExprKind::Float(v) => { - let size = if matches!(expr.ty, InferType::F32) { - AirFloatSize::F32 - } else { - AirFloatSize::F64 - }; - Some(AirConst::Float(*v, size)) - } - TypedExprKind::Bool(v) => Some(AirConst::Bool(*v)), - TypedExprKind::String(v) => Some(AirConst::Str(v.clone())), - TypedExprKind::Null => Some(AirConst::Null), - _ => None, - } - } - - // ======================================================================== - // Body lowering - // ======================================================================== - - fn lower_body(&mut self, stmts: &[TypedStmt]) { - for stmt in stmts { - self.lower_stmt(stmt); - } - } - - fn finalize_function_body(&mut self) { - if (self.current_stmts.is_empty() && self.current_blocks.is_empty()) - || !self.current_stmts.is_empty() - { - self.seal_block(AirTerminator::Return(None)); - } - } - - fn lower_stmt(&mut self, stmt: &TypedStmt) { - let sp = Some(self.span(&stmt.span)); - match &stmt.kind { - TypedStmtKind::Expression(expr) => { - self.lower_expr_discard(expr); - } - TypedStmtKind::Let { - name, - mutable, - initializer, - var_type, - .. - } => { - let ty = self.lower_type_from_infer(var_type); - let local = self.alloc_named_local(name, ty, *mutable, sp); - let operand = self.lower_expr(initializer); - self.emit( - AirStmtKind::Assign { - place: Place::Local(local), - rvalue: Rvalue::Use(operand), - }, - sp, - ); - } - TypedStmtKind::Block(stmts) => { - self.lower_body(stmts); - } - TypedStmtKind::If { - condition, - then_branch, - else_branch, - } => { - self.lower_if(condition, then_branch, else_branch.as_deref(), sp); - } - TypedStmtKind::While { condition, body } => { - self.lower_while(condition, body, sp); - } - TypedStmtKind::For { - iterator, - start, - end, - inclusive, - step, - body, - } => { - self.lower_for(iterator, start, end, *inclusive, step.as_ref(), body); - } - TypedStmtKind::ForEach { - iterator, - iterable, - elem_type, - body, - } => { - self.lower_foreach(iterator, iterable, elem_type, body, sp); - } - TypedStmtKind::Return(val) => { - let operand = val.as_ref().map(|e| self.lower_expr(e)); - self.seal_block(AirTerminator::Return(operand)); - } - TypedStmtKind::Break => { - if let Some(loop_ctx) = self.loop_stack.last() { - let exit = loop_ctx.exit; - self.seal_block(AirTerminator::Goto(exit)); - } - } - TypedStmtKind::Continue => { - if let Some(loop_ctx) = self.loop_stack.last() { - let header = loop_ctx.header; - self.seal_block(AirTerminator::Goto(header)); - } - } - TypedStmtKind::Function(func) => { - self.lower_function(func); - } - TypedStmtKind::Needs(_) | TypedStmtKind::StructDecl { .. } => {} - } - } - - // control flow desuccrage - fn lower_if( - &mut self, - condition: &TypedExpr, - then_branch: &TypedStmt, - else_branch: Option<&TypedStmt>, - _sp: Option, - ) { - let cond = self.lower_expr(condition); - let then_id = self.alloc_block_id(); - let else_id = self.alloc_block_id(); - let merge_id = self.alloc_block_id(); - - self.seal_block(AirTerminator::Branch { - cond, - then_block: then_id, - else_block: if else_branch.is_some() { - else_id - } else { - merge_id - }, - }); - - self.lower_stmt(then_branch); - if !self.last_block_is_terminated() { - self.seal_block(AirTerminator::Goto(merge_id)); - } - self.fixup_block_id(then_id); - - if let Some(else_br) = else_branch { - self.lower_stmt(else_br); - if !self.last_block_is_terminated() { - self.seal_block(AirTerminator::Goto(merge_id)); - } - self.fixup_block_id(else_id); - } - - self.fixup_block_id_noop(merge_id); - } - - fn lower_while(&mut self, condition: &TypedExpr, body: &TypedStmt, _sp: Option) { - let header_id = self.alloc_block_id(); - let body_id = self.alloc_block_id(); - let exit_id = self.alloc_block_id(); - - self.seal_block(AirTerminator::Goto(header_id)); - - let cond = self.lower_expr(condition); - self.seal_block(AirTerminator::Branch { - cond, - then_block: body_id, - else_block: exit_id, - }); - self.fixup_block_id(header_id); - - self.loop_stack.push(LoopBlocks { - header: header_id, - exit: exit_id, - }); - self.lower_stmt(body); - if !self.last_block_is_terminated() { - self.seal_block(AirTerminator::Goto(header_id)); - } - self.fixup_block_id(body_id); - self.loop_stack.pop(); - - self.fixup_block_id_noop(exit_id); - } - - fn lower_for( - &mut self, - iterator: &str, - start: &TypedExpr, - end: &TypedExpr, - inclusive: bool, - step: &Option, - body: &TypedStmt, - ) { - let start_span = Some(self.span(&start.span)); - let iter_ty = self.lower_type_from_infer(&start.ty); - let iter_local = self.alloc_named_local(iterator, iter_ty.clone(), true, start_span); - let start_op = self.lower_expr(start); - self.emit( - AirStmtKind::Assign { - place: Place::Local(iter_local), - rvalue: Rvalue::Use(start_op), - }, - start_span, - ); - - let end_local = self.alloc_temp(iter_ty.clone()); - let end_op = self.lower_expr(end); - self.emit( - AirStmtKind::Assign { - place: Place::Local(end_local), - rvalue: Rvalue::Use(end_op), - }, - Some(self.span(&end.span)), - ); - - let header_id = self.alloc_block_id(); - let body_id = self.alloc_block_id(); - let incr_id = self.alloc_block_id(); - let exit_id = self.alloc_block_id(); - - self.seal_block(AirTerminator::Goto(header_id)); - - let cmp_op = if inclusive { BinOp::Le } else { BinOp::Lt }; - let cond_local = self.alloc_temp(AirType::Bool); - self.emit( - AirStmtKind::Assign { - place: Place::Local(cond_local), - rvalue: Rvalue::BinaryOp( - cmp_op, - Operand::Copy(iter_local), - Operand::Copy(end_local), - ), - }, - None, - ); - self.seal_block(AirTerminator::Branch { - cond: Operand::Copy(cond_local), - then_block: body_id, - else_block: exit_id, - }); - self.fixup_block_id(header_id); - - self.loop_stack.push(LoopBlocks { - header: incr_id, - exit: exit_id, - }); - self.lower_stmt(body); - if !self.last_block_is_terminated() { - self.seal_block(AirTerminator::Goto(incr_id)); - } - self.fixup_block_id(body_id); - self.loop_stack.pop(); - - let step_operand = if let Some(step_expr) = step { - self.lower_expr(step_expr) - } else { - Operand::Const(AirConst::IntLiteral(1)) - }; - self.emit( - AirStmtKind::Assign { - place: Place::Local(iter_local), - rvalue: Rvalue::BinaryOp(BinOp::Add, Operand::Copy(iter_local), step_operand), - }, - None, - ); - self.seal_block(AirTerminator::Goto(header_id)); - self.fixup_block_id(incr_id); - - self.fixup_block_id_noop(exit_id); - } - - fn lower_foreach( - &mut self, - iterator: &str, - iterable: &TypedExpr, - elem_type: &InferType, - body: &TypedStmt, - sp: Option, - ) { - let collection = self.lower_expr(iterable); - let col_ty = self.lower_type_from_infer(&iterable.ty); - let col_local = self.alloc_temp(col_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(col_local), - rvalue: Rvalue::Use(collection), - }, - sp, - ); - - let idx_local = self.alloc_temp(AirType::I64); - self.emit( - AirStmtKind::Assign { - place: Place::Local(idx_local), - rvalue: Rvalue::Use(Operand::Const(AirConst::IntLiteral(0))), - }, - None, - ); - - let len_local = self.alloc_temp(AirType::I64); - self.emit( - AirStmtKind::Assign { - place: Place::Local(len_local), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_len".to_string()), - args: vec![Operand::Copy(col_local)], - }, - }, - None, - ); - - let elem_air_ty = self.lower_type_from_infer(elem_type); - let elem_local = self.alloc_named_local(iterator, elem_air_ty, false, sp); - - let header_id = self.alloc_block_id(); - let body_id = self.alloc_block_id(); - let incr_id = self.alloc_block_id(); - let exit_id = self.alloc_block_id(); - - self.seal_block(AirTerminator::Goto(header_id)); - - let cond_local = self.alloc_temp(AirType::Bool); - self.emit( - AirStmtKind::Assign { - place: Place::Local(cond_local), - rvalue: Rvalue::BinaryOp( - BinOp::Lt, - Operand::Copy(idx_local), - Operand::Copy(len_local), - ), - }, - None, - ); - self.seal_block(AirTerminator::Branch { - cond: Operand::Copy(cond_local), - then_block: body_id, - else_block: exit_id, - }); - self.fixup_block_id(header_id); - - self.emit( - AirStmtKind::Assign { - place: Place::Local(elem_local), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_index".to_string()), - args: vec![Operand::Copy(col_local), Operand::Copy(idx_local)], - }, - }, - None, - ); - - self.loop_stack.push(LoopBlocks { - header: incr_id, - exit: exit_id, - }); - self.lower_stmt(body); - if !self.last_block_is_terminated() { - self.seal_block(AirTerminator::Goto(incr_id)); - } - self.fixup_block_id(body_id); - self.loop_stack.pop(); - - self.emit( - AirStmtKind::Assign { - place: Place::Local(idx_local), - rvalue: Rvalue::BinaryOp( - BinOp::Add, - Operand::Copy(idx_local), - Operand::Const(AirConst::IntLiteral(1)), - ), - }, - None, - ); - self.seal_block(AirTerminator::Goto(header_id)); - self.fixup_block_id(incr_id); - - self.fixup_block_id_noop(exit_id); - } - - // ======================================================================== - // Block ID fixup helpers - // - // We pre-allocate block IDs, then seal blocks in order. - // After sealing, the block gets the next sequential ID. We fix it up to - // the pre-allocated ID so branch targets stay valid. - // ======================================================================== - - fn fixup_block_id(&mut self, target: BlockId) { - if let Some(block) = self.current_blocks.last_mut() { - let old_id = block.id; - block.id = target; - if old_id != target { - self.block_aliases.push((old_id.0, target.0)); - } - } - } - - fn fixup_block_id_noop(&mut self, target: BlockId) { - self.pending_block_id = Some(target); - } - - fn resolve_block_aliases(&mut self) { - if self.block_aliases.is_empty() { - return; - } - let resolve = |id: &mut BlockId, aliases: &[(u32, u32)]| { - let mut current = id.0; - for _ in 0..aliases.len() { - if let Some(&(_, to)) = aliases.iter().find(|(from, _)| *from == current) { - current = to; - } else { - break; - } - } - *id = BlockId(current); - }; - for block in &mut self.current_blocks { - let aliases = &self.block_aliases; - match &mut block.terminator { - AirTerminator::Goto(id) => resolve(id, aliases), - AirTerminator::Branch { - then_block, - else_block, - .. - } => { - resolve(then_block, aliases); - resolve(else_block, aliases); - } - AirTerminator::Switch { - targets, default, .. - } => { - for (_, id) in targets { - resolve(id, aliases); - } - resolve(default, aliases); - } - AirTerminator::Invoke { normal, unwind, .. } => { - resolve(normal, aliases); - resolve(unwind, aliases); - } - AirTerminator::Return(_) - | AirTerminator::Unreachable - | AirTerminator::Unwind - | AirTerminator::Panic { .. } => {} - } - } - self.block_aliases.clear(); - } - - fn last_block_is_terminated(&self) -> bool { - self.current_stmts.is_empty() - && self.current_blocks.last().is_some_and(|b| { - !matches!(b.terminator, AirTerminator::Goto(_)) - || matches!(b.terminator, AirTerminator::Return(_)) - || matches!(b.terminator, AirTerminator::Unreachable) - }) - } - - fn lower_expr(&mut self, expr: &TypedExpr) -> Operand { - let sp = Some(self.span(&expr.span)); - match &expr.kind { - TypedExprKind::Int(v) => { - if expr.ty.is_integer() { - Operand::Const(AirConst::Int(*v, infer_to_int_size(&expr.ty))) - } else { - Operand::Const(AirConst::IntLiteral(*v)) - } - } - TypedExprKind::Float(v) => { - let size = if matches!(expr.ty, InferType::F32) { - AirFloatSize::F32 - } else { - AirFloatSize::F64 - }; - Operand::Const(AirConst::Float(*v, size)) - } - TypedExprKind::Bool(v) => Operand::Const(AirConst::Bool(*v)), - TypedExprKind::String(v) => Operand::Const(AirConst::Str(v.clone())), - TypedExprKind::Null => Operand::Const(AirConst::Null), - - TypedExprKind::Identifier(name) => { - if let Some(id) = self.lookup_local(name) { - Operand::Copy(id) - } else { - let tmp = self.alloc_temp(self.lower_type_from_infer(&expr.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named(format!("__aelys_global_get_{}", name)), - args: Vec::new(), - }, - }, - sp, - ); - Operand::Copy(tmp) - } - } - - TypedExprKind::Binary { left, op, right } => { - let l = self.lower_expr(left); - let r = self.lower_expr(right); - let air_op = lower_binop(op); - let result_ty = self.lower_type_from_infer(&expr.ty); - let tmp = self.alloc_temp(result_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::BinaryOp(air_op, l, r), - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::Unary { op, operand } => { - let inner = self.lower_expr(operand); - let air_op = lower_unop(op); - let result_ty = self.lower_type_from_infer(&expr.ty); - let tmp = self.alloc_temp(result_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::UnaryOp(air_op, inner), - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::And { left, right } => self.lower_short_circuit(left, right, true, expr), - - TypedExprKind::Or { left, right } => self.lower_short_circuit(left, right, false, expr), - - TypedExprKind::Call { callee, args } => { - let lowered_args: Vec = args.iter().map(|a| self.lower_expr(a)).collect(); - let func = self.lower_callee(callee); - let result_ty = self.lower_type_from_infer(&expr.ty); - let tmp = self.alloc_temp(result_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func, - args: lowered_args, - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::Assign { name, value } => { - let val = self.lower_expr(value); - if let Some(id) = self.lookup_local(name) { - self.emit( - AirStmtKind::Assign { - place: Place::Local(id), - rvalue: Rvalue::Use(val), - }, - sp, - ); - Operand::Copy(id) - } else { - self.emit( - AirStmtKind::CallVoid { - func: Callee::Named(format!("__aelys_global_set_{}", name)), - args: vec![val], - }, - sp, - ); - Operand::Const(AirConst::Null) - } - } - - TypedExprKind::Grouping(inner) => self.lower_expr(inner), - - TypedExprKind::If { - condition, - then_branch, - else_branch, - } => self.lower_if_expr(condition, then_branch, else_branch, expr), - - TypedExprKind::Lambda(inner) => self.lower_expr(inner), - - TypedExprKind::LambdaInner { - params, - return_type, - body, - captures, - } => self.lower_lambda(params, return_type, body, captures, expr), - - TypedExprKind::FmtString(parts) => self.lower_fmt_string(parts, sp), - - TypedExprKind::Member { object, member } => { - let base = self.lower_expr(object); - let result_ty = self.lower_type_from_infer(&expr.ty); - let tmp = self.alloc_temp(result_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::FieldAccess { - base, - field: member.clone(), - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::StructLiteral { name, fields } => { - let lowered_fields: Vec<(String, Operand)> = fields - .iter() - .map(|(fname, fval)| (fname.clone(), self.lower_expr(fval))) - .collect(); - let result_ty = self.lower_type_from_infer(&expr.ty); - let tmp = self.alloc_temp(result_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::StructInit { - name: name.clone(), - fields: lowered_fields, - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::ArrayLiteral { elements, .. } => { - let lowered: Vec = elements.iter().map(|e| self.lower_expr(e)).collect(); - let tmp = self.alloc_temp(self.lower_type_from_infer(&expr.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_array_new".to_string()), - args: lowered, - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::ArraySized { size, .. } => { - let sz = self.lower_expr(size); - let tmp = self.alloc_temp(self.lower_type_from_infer(&expr.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_array_sized".to_string()), - args: vec![sz], - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::VecLiteral { elements, .. } => { - let lowered: Vec = elements.iter().map(|e| self.lower_expr(e)).collect(); - let tmp = self.alloc_temp(self.lower_type_from_infer(&expr.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_vec_new".to_string()), - args: lowered, - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::Index { object, index } => { - let obj = self.lower_expr(object); - let idx = self.lower_expr(index); - let result_ty = self.lower_type_from_infer(&expr.ty); - let tmp = self.alloc_temp(result_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_index".to_string()), - args: vec![obj, idx], - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::IndexAssign { - object, - index, - value, - } => { - let obj = self.lower_expr(object); - let idx = self.lower_expr(index); - let val = self.lower_expr(value); - self.emit( - AirStmtKind::CallVoid { - func: Callee::Named("__aelys_index_set".to_string()), - args: vec![obj, idx, val], - }, - sp, - ); - Operand::Const(AirConst::Null) - } - - TypedExprKind::Range { start, end, .. } => { - let mut args = Vec::new(); - if let Some(s) = start { - args.push(self.lower_expr(s)); - } - if let Some(e) = end { - args.push(self.lower_expr(e)); - } - let tmp = self.alloc_temp(self.lower_type_from_infer(&expr.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_range".to_string()), - args, - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::Slice { object, range } => { - let obj = self.lower_expr(object); - let rng = self.lower_expr(range); - let tmp = self.alloc_temp(self.lower_type_from_infer(&expr.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_slice".to_string()), - args: vec![obj, rng], - }, - }, - sp, - ); - Operand::Copy(tmp) - } - - TypedExprKind::Cast { - expr: inner, - target, - } => { - let operand = self.lower_expr(inner); - let from = self.lower_type_from_infer(&inner.ty); - let to = self.lower_type_from_infer(target); - let tmp = self.alloc_temp(to.clone()); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Cast { operand, from, to }, - }, - sp, - ); - Operand::Copy(tmp) - } - } - } - - fn lower_expr_discard(&mut self, expr: &TypedExpr) { - let sp = Some(self.span(&expr.span)); - match &expr.kind { - TypedExprKind::Call { callee, args } => { - let lowered_args: Vec = args.iter().map(|a| self.lower_expr(a)).collect(); - let func = self.lower_callee(callee); - if matches!(expr.ty, InferType::Null) { - self.emit( - AirStmtKind::CallVoid { - func, - args: lowered_args, - }, - sp, - ); - } else { - let tmp = self.alloc_temp(self.lower_type_from_infer(&expr.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func, - args: lowered_args, - }, - }, - sp, - ); - } - } - TypedExprKind::Assign { name, value } => { - let val = self.lower_expr(value); - if let Some(id) = self.lookup_local(name) { - self.emit( - AirStmtKind::Assign { - place: Place::Local(id), - rvalue: Rvalue::Use(val), - }, - sp, - ); - } else { - self.emit( - AirStmtKind::CallVoid { - func: Callee::Named(format!("__aelys_global_set_{}", name)), - args: vec![val], - }, - sp, - ); - } - } - TypedExprKind::IndexAssign { - object, - index, - value, - } => { - let obj = self.lower_expr(object); - let idx = self.lower_expr(index); - let val = self.lower_expr(value); - self.emit( - AirStmtKind::CallVoid { - func: Callee::Named("__aelys_index_set".to_string()), - args: vec![obj, idx, val], - }, - sp, - ); - } - _ => { - self.lower_expr(expr); - } - } - } - - fn lower_callee(&mut self, callee: &TypedExpr) -> Callee { - match &callee.kind { - TypedExprKind::Identifier(name) => Callee::Named(name.clone()), - TypedExprKind::Member { object, member } => { - if let TypedExprKind::Identifier(mod_name) = &object.kind { - Callee::Named(format!("{}.{}", mod_name, member)) - } else { - let op = self.lower_expr(callee); - let tmp = match op { - Operand::Copy(id) | Operand::Move(id) => id, - Operand::Const(_) => { - let t = self.alloc_temp(self.lower_type_from_infer(&callee.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(t), - rvalue: Rvalue::Use(op), - }, - None, - ); - t - } - }; - Callee::FnPtr(tmp) - } - } - _ => { - let op = self.lower_expr(callee); - let tmp = match op { - Operand::Copy(id) | Operand::Move(id) => id, - Operand::Const(_) => { - let t = self.alloc_temp(self.lower_type_from_infer(&callee.ty)); - self.emit( - AirStmtKind::Assign { - place: Place::Local(t), - rvalue: Rvalue::Use(op), - }, - None, - ); - t - } - }; - Callee::FnPtr(tmp) - } - } - } - - // short-circuit lowering (and/or) - fn lower_short_circuit( - &mut self, - left: &TypedExpr, - right: &TypedExpr, - is_and: bool, - _parent: &TypedExpr, - ) -> Operand { - let result = self.alloc_temp(AirType::Bool); - let lhs = self.lower_expr(left); - self.emit( - AirStmtKind::Assign { - place: Place::Local(result), - rvalue: Rvalue::Use(lhs), - }, - None, - ); - - let eval_right_id = self.alloc_block_id(); - let merge_id = self.alloc_block_id(); - - if is_and { - self.seal_block(AirTerminator::Branch { - cond: Operand::Copy(result), - then_block: eval_right_id, - else_block: merge_id, - }); - } else { - self.seal_block(AirTerminator::Branch { - cond: Operand::Copy(result), - then_block: merge_id, - else_block: eval_right_id, - }); - } - - let rhs = self.lower_expr(right); - self.emit( - AirStmtKind::Assign { - place: Place::Local(result), - rvalue: Rvalue::Use(rhs), - }, - None, - ); - self.seal_block(AirTerminator::Goto(merge_id)); - self.fixup_block_id(eval_right_id); - - self.fixup_block_id_noop(merge_id); - Operand::Copy(result) - } - - // ======================================================================== - // If-expression lowering - // ======================================================================== - - fn lower_if_expr( - &mut self, - condition: &TypedExpr, - then_branch: &TypedExpr, - else_branch: &TypedExpr, - parent: &TypedExpr, - ) -> Operand { - let result_ty = self.lower_type_from_infer(&parent.ty); - let result = self.alloc_temp(result_ty); - - let cond = self.lower_expr(condition); - let then_id = self.alloc_block_id(); - let else_id = self.alloc_block_id(); - let merge_id = self.alloc_block_id(); - - self.seal_block(AirTerminator::Branch { - cond, - then_block: then_id, - else_block: else_id, - }); - - let then_val = self.lower_expr(then_branch); - self.emit( - AirStmtKind::Assign { - place: Place::Local(result), - rvalue: Rvalue::Use(then_val), - }, - None, - ); - self.seal_block(AirTerminator::Goto(merge_id)); - self.fixup_block_id(then_id); - - let else_val = self.lower_expr(else_branch); - self.emit( - AirStmtKind::Assign { - place: Place::Local(result), - rvalue: Rvalue::Use(else_val), - }, - None, - ); - self.seal_block(AirTerminator::Goto(merge_id)); - self.fixup_block_id(else_id); - - self.fixup_block_id_noop(merge_id); - Operand::Copy(result) - } - - // lamba lowering (desugarded to closure env struct + function) - fn lower_lambda( - &mut self, - params: &[TypedParam], - return_type: &InferType, - body: &[TypedStmt], - captures: &[(String, InferType)], - parent: &TypedExpr, - ) -> Operand { - let lambda_name = format!("__lambda_{}", self.next_function_id); - let fake_func = TypedFunction { - name: lambda_name.clone(), - type_params: Vec::new(), - params: params.to_vec(), - return_type: return_type.clone(), - body: body.to_vec(), - decorators: Vec::new(), - is_pub: false, - span: parent.span, - captures: captures.to_vec(), - }; - self.lower_function(&fake_func); - - let result_ty = self.lower_type_from_infer(&parent.ty); - let tmp = self.alloc_temp(result_ty); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Use(Operand::Const(AirConst::Null)), - }, - Some(self.span(&parent.span)), - ); - Operand::Copy(tmp) - } - - // format string → __aelys_str_concat / __aelys_to_string - fn lower_fmt_string(&mut self, parts: &[TypedFmtStringPart], sp: Option) -> Operand { - let mut operands: Vec = Vec::new(); - - for part in parts { - match part { - TypedFmtStringPart::Literal(s) => { - operands.push(Operand::Const(AirConst::Str(s.clone()))); - } - TypedFmtStringPart::Expr(expr) => { - let val = self.lower_expr(expr); - if matches!(expr.ty, InferType::String) { - operands.push(val); - } else { - let str_tmp = self.alloc_temp(AirType::Str); - self.emit( - AirStmtKind::Assign { - place: Place::Local(str_tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_to_string".to_string()), - args: vec![val], - }, - }, - None, - ); - operands.push(Operand::Copy(str_tmp)); - } - } - TypedFmtStringPart::Placeholder => { - operands.push(Operand::Const(AirConst::Str(String::new()))); - } - } - } - - if operands.is_empty() { - return Operand::Const(AirConst::Str(String::new())); - } - if operands.len() == 1 { - return operands.into_iter().next().unwrap(); - } - - let mut acc = operands.remove(0); - for part in operands { - let tmp = self.alloc_temp(AirType::Str); - self.emit( - AirStmtKind::Assign { - place: Place::Local(tmp), - rvalue: Rvalue::Call { - func: Callee::Named("__aelys_str_concat".to_string()), - args: vec![acc, part], - }, - }, - sp, - ); - acc = Operand::Copy(tmp); - } - acc - } -} - -// helpers -fn infer_to_int_size(ty: &InferType) -> AirIntSize { - match ty { - InferType::I8 => AirIntSize::I8, - InferType::I16 => AirIntSize::I16, - InferType::I32 => AirIntSize::I32, - InferType::I64 => AirIntSize::I64, - InferType::U8 => AirIntSize::U8, - InferType::U16 => AirIntSize::U16, - InferType::U32 => AirIntSize::U32, - InferType::U64 => AirIntSize::U64, - _ => AirIntSize::I64, - } -} - -fn lower_binop(op: &BinaryOp) -> BinOp { - match op { - BinaryOp::Add => BinOp::Add, - BinaryOp::Sub => BinOp::Sub, - BinaryOp::Mul => BinOp::Mul, - BinaryOp::Div => BinOp::Div, - BinaryOp::Mod => BinOp::Rem, - BinaryOp::Eq => BinOp::Eq, - BinaryOp::Ne => BinOp::Ne, - BinaryOp::Lt => BinOp::Lt, - BinaryOp::Le => BinOp::Le, - BinaryOp::Gt => BinOp::Gt, - BinaryOp::Ge => BinOp::Ge, - BinaryOp::Shl => BinOp::Shl, - BinaryOp::Shr => BinOp::Shr, - BinaryOp::BitAnd => BinOp::BitAnd, - BinaryOp::BitOr => BinOp::BitOr, - BinaryOp::BitXor => BinOp::BitXor, - } -} - -fn lower_unop(op: &aelys_syntax::UnaryOp) -> UnOp { - match op { - aelys_syntax::UnaryOp::Neg => UnOp::Neg, - aelys_syntax::UnaryOp::Not => UnOp::Not, - aelys_syntax::UnaryOp::BitNot => UnOp::BitNot, - } -} diff --git a/air/src/lower/expr.rs b/air/src/lower/expr.rs new file mode 100644 index 0000000..f4e4141 --- /dev/null +++ b/air/src/lower/expr.rs @@ -0,0 +1,1316 @@ +use super::{LoweringContext, infer_to_int_size, lower_binop, lower_unop}; +use crate::*; +use aelys_sema::{ + InferType, TypedExpr, TypedExprKind, TypedFmtStringPart, TypedMatchArm, TypedParam, + TypedPattern, TypedStmt, +}; + +impl<'a> LoweringContext<'a> { + /// Returns true for types that have no runtime representation (void, null, + /// opaque). Used to skip result assignments in match/if-else branches. + fn is_void_like(ty: &AirType) -> bool { + matches!(ty, AirType::Void | AirType::Opaque) + || matches!(ty, AirType::Ptr(inner) if matches!(inner.as_ref(), AirType::Void)) + } + + pub(super) fn lower_expr(&mut self, expr: &TypedExpr) -> Operand { + let sp = Some(self.span(&expr.span)); + match &expr.kind { + TypedExprKind::Int(v) => { + if expr.ty.is_integer() { + Operand::Const(AirConst::Int(*v, infer_to_int_size(&expr.ty))) + } else { + Operand::Const(AirConst::IntLiteral(*v)) + } + } + TypedExprKind::Float(v) => { + let size = if matches!(expr.ty, InferType::F32) { + AirFloatSize::F32 + } else { + AirFloatSize::F64 + }; + Operand::Const(AirConst::Float(*v, size)) + } + TypedExprKind::Bool(v) => Operand::Const(AirConst::Bool(*v)), + TypedExprKind::String(v) => Operand::Const(AirConst::Str(v.clone())), + TypedExprKind::Null => Operand::Const(AirConst::Null), + + TypedExprKind::Identifier(name) => { + if let Some(id) = self.lookup_local(name) { + Operand::Copy(id) + } else if self.globals.iter().any(|global| global.name == *name) { + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::Call { + func: Callee::Named(format!("__aelys_global_get_{}", name)), + args: Vec::new(), + }, + sp, + ) + } else if matches!(expr.ty, InferType::Function { .. }) { + // Named function used as a value: wrap in a fat pointer with + // null env. Same representation as a closure; see lower_lambda. + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::ClosureCreate { + fn_name: name.clone(), + env: Operand::Const(AirConst::Null), + }, + sp, + ) + } else { + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::Call { + func: Callee::Named(format!("__aelys_global_get_{}", name)), + args: Vec::new(), + }, + sp, + ) + } + } + + TypedExprKind::Binary { left, op, right } => { + let l = self.lower_expr(left); + let r = self.lower_expr(right); + let air_op = lower_binop(op); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::BinaryOp(air_op, l, r), + sp, + ) + } + + TypedExprKind::Unary { op, operand } => { + let inner = self.lower_expr(operand); + let air_op = lower_unop(op); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::UnaryOp(air_op, inner), + sp, + ) + } + + TypedExprKind::And { left, right } => self.lower_short_circuit(left, right, true, expr), + + TypedExprKind::Or { left, right } => self.lower_short_circuit(left, right, false, expr), + + TypedExprKind::Call { callee, args } => { + let lowered_args: Vec = args.iter().map(|a| self.lower_expr(a)).collect(); + let func = self.lower_callee(callee); + self.lower_call_common(func, lowered_args, &expr.ty, sp) + } + + TypedExprKind::Assign { name, value } => self.lower_assign_common(name, value, sp), + + TypedExprKind::Grouping(inner) => self.lower_expr(inner), + + TypedExprKind::If { + condition, + then_branch, + else_branch, + } => self.lower_if_expr(condition, then_branch, else_branch, expr), + + TypedExprKind::Lambda(inner) => self.lower_expr(inner), + + TypedExprKind::LambdaInner { + params, + return_type, + body, + captures, + } => self.lower_lambda(params, return_type, body, captures, expr), + + TypedExprKind::FmtString(parts) => self.lower_fmt_string(parts, sp), + + TypedExprKind::Member { object, member } => { + let base = self.lower_expr(object); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::FieldAccess { + base, + field: member.clone(), + }, + sp, + ) + } + + TypedExprKind::StructLiteral { name, fields } => { + let lowered_fields: Vec<(String, Operand)> = fields + .iter() + .map(|(fname, fval)| (fname.clone(), self.lower_expr(fval))) + .collect(); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::StructInit { + name: name.clone(), + fields: lowered_fields, + }, + sp, + ) + } + + TypedExprKind::ArrayLiteral { elements, .. } => { + let lowered: Vec = elements.iter().map(|e| self.lower_expr(e)).collect(); + let n = lowered.len() as u64; + // extract element type from the array type + let elem_ty = match &expr.ty { + InferType::Array(inner, _) => self.lower_type_from_infer(inner), + other => { + self.report_error(format!( + "ICE: array literal has non-array type `{}` at AIR lowering", + other + )); + AirType::I64 + } + }; + let arr_ty = AirType::Array(Box::new(elem_ty), n); + let arr_local = self.alloc_temp_mut(arr_ty); + for (i, elem_op) in lowered.into_iter().enumerate() { + self.emit( + AirStmtKind::Assign { + place: Place::Index( + arr_local, + Operand::Const(AirConst::IntLiteral(i as i64)), + ), + rvalue: Rvalue::Use(elem_op), + }, + sp, + ); + } + Operand::Copy(arr_local) + } + + TypedExprKind::ArraySized { + size, fill_value, .. + } => { + // extract the const size (no longer panics on non-constant) + let n = match &size.kind { + TypedExprKind::Int(v) => *v as u64, + _ => { + self.report_error( + "unsupported non-constant array size in AIR lowering: \ + ArraySized requires a constant integer size expression" + .to_string(), + ); + // Fallback: treat as zero-length array so lowering can continue + 0 + } + }; + let elem_ty = match &expr.ty { + InferType::Array(inner, _) => self.lower_type_from_infer(inner), + other => { + self.report_error(format!( + "ICE: ArraySized has non-array type `{}` at AIR lowering", + other + )); + AirType::I64 + } + }; + self.check_stack_array_size(&elem_ty, n); + let arr_ty = AirType::Array(Box::new(elem_ty), n); + let arr_local = self.alloc_temp_mut(arr_ty); + // lower fill value or use zero-init + let fill_op = if let Some(fv) = fill_value { + self.lower_expr(fv) + } else { + let elem_ty_for_zero = match &expr.ty { + InferType::Array(inner, _) => self.lower_type_from_infer(inner), + other => { + self.report_error(format!( + "ICE: ArraySized zero-init has non-array type `{}` at AIR lowering", + other + )); + AirType::I64 + } + }; + Operand::Const(AirConst::ZeroInit(elem_ty_for_zero)) + }; + for i in 0..n { + self.emit( + AirStmtKind::Assign { + place: Place::Index( + arr_local, + Operand::Const(AirConst::IntLiteral(i as i64)), + ), + rvalue: Rvalue::Use(fill_op.clone()), + }, + sp, + ); + } + Operand::Copy(arr_local) + } + + TypedExprKind::VecLiteral { elements, .. } => { + let lowered: Vec = elements.iter().map(|e| self.lower_expr(e)).collect(); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::Call { + func: Callee::Named("__aelys_vec_new".to_string()), + args: lowered, + }, + sp, + ) + } + + TypedExprKind::Index { object, index } => { + let obj = self.lower_expr(object); + let idx = self.lower_expr(index); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::Index { + base: obj, + index: idx, + }, + sp, + ) + } + + TypedExprKind::IndexAssign { + object, + index, + value, + } => self.lower_index_assign(object, index, value, sp), + + TypedExprKind::FieldAssign { + object, + field, + value, + } => self.lower_field_assign(object, field, value, sp), + + TypedExprKind::Range { start, end, .. } => { + let mut args = Vec::new(); + if let Some(s) = start { + args.push(self.lower_expr(s)); + } + if let Some(e) = end { + args.push(self.lower_expr(e)); + } + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::Call { + func: Callee::Named("__aelys_range".to_string()), + args, + }, + sp, + ) + } + + TypedExprKind::Slice { object, range } => { + let obj = self.lower_expr(object); + let rng = self.lower_expr(range); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::Call { + func: Callee::Named("__aelys_slice".to_string()), + args: vec![obj, rng], + }, + sp, + ) + } + + TypedExprKind::Cast { + expr: inner, + target, + } => { + let operand = self.lower_expr(inner); + let from = self.lower_type_from_infer(&inner.ty); + let to = self.lower_type_from_infer(target); + self.emit_rvalue_to_temp(to.clone(), Rvalue::Cast { operand, from, to }, sp) + } + TypedExprKind::EnumVariant { + enum_name, + variant, + tag, + args, + } => { + let payload: Vec = args.iter().map(|a| self.lower_expr(a)).collect(); + self.emit_rvalue_to_temp( + self.lower_type_from_infer(&expr.ty), + Rvalue::EnumInit { + enum_name: enum_name.clone(), + variant: variant.clone(), + tag: *tag, + payload, + }, + sp, + ) + } + TypedExprKind::Block { stmts, tail } => { + let scope_depth = self.locals_by_name.len(); + for stmt in stmts { + self.lower_stmt(stmt); + } + let result = self.lower_expr(tail); + self.locals_by_name.truncate(scope_depth); + result + } + TypedExprKind::Match { scrutinee, arms } => { + self.lower_match_expr(scrutinee, arms, expr) + } + } + } + + pub(super) fn lower_expr_discard(&mut self, expr: &TypedExpr) { + let sp = Some(self.span(&expr.span)); + match &expr.kind { + TypedExprKind::Call { callee, args } => { + let lowered_args: Vec = args.iter().map(|a| self.lower_expr(a)).collect(); + let func = self.lower_callee(callee); + let ret_ty = self.lower_type_from_infer(&expr.ty); + // Void, Opaque, and Ptr(Void) calls in discard position should emit CallVoid. + if Self::is_void_like(&ret_ty) { + self.emit( + AirStmtKind::CallVoid { + func, + args: lowered_args, + }, + sp, + ); + } else { + self.emit_rvalue_to_temp( + ret_ty, + Rvalue::Call { + func, + args: lowered_args, + }, + sp, + ); + } + } + TypedExprKind::Assign { name, value } => { + self.lower_assign_common(name, value, sp); + } + TypedExprKind::IndexAssign { + object, + index, + value, + } => { + self.lower_index_assign(object, index, value, sp); + } + TypedExprKind::FieldAssign { + object, + field, + value, + } => { + self.lower_field_assign(object, field, value, sp); + } + _ => { + self.lower_expr(expr); + } + } + } + + /// Shared logic for Call expressions: emit CallVoid for void, otherwise assign to temp. + fn lower_call_common( + &mut self, + func: Callee, + args: Vec, + result_infer_ty: &InferType, + sp: Option, + ) -> Operand { + let result_ty = self.lower_type_from_infer(result_infer_ty); + // Void, Opaque, and Ptr(Void) (the null type from InferType::Null, + // used by builtins like print/println) can't be used as values in + // LLVM. Emit CallVoid so codegen never tries to capture the result. + if Self::is_void_like(&result_ty) { + self.emit(AirStmtKind::CallVoid { func, args }, sp); + Operand::Const(AirConst::Null) + } else { + self.emit_rvalue_to_temp(result_ty, Rvalue::Call { func, args }, sp) + } + } + + /// Shared logic for Assign expressions. + fn lower_assign_common(&mut self, name: &str, value: &TypedExpr, sp: Option) -> Operand { + let val = self.lower_expr(value); + if let Some(id) = self.lookup_local(name) { + self.emit( + AirStmtKind::Assign { + place: Place::Local(id), + rvalue: Rvalue::Use(val), + }, + sp, + ); + // If this variable is a closure capture, write the new value back + // to the env struct so future calls see the updated value. + if let Some(env_id) = self.closure_env_param { + if self.closure_captures.contains(name) { + self.emit( + AirStmtKind::Assign { + place: Place::Field(env_id, name.to_string()), + rvalue: Rvalue::Use(Operand::Copy(id)), + }, + sp, + ); + } + } + Operand::Copy(id) + } else { + self.emit( + AirStmtKind::CallVoid { + func: Callee::Named(format!("__aelys_global_set_{}", name)), + args: vec![val], + }, + sp, + ); + Operand::Const(AirConst::Null) + } + } + + /// Shared logic for IndexAssign expressions. + /// + /// Handles `obj[i] = val` where `obj` may be a chain of field accesses + /// (e.g. `buf.data[i] = val`). In that case a read-modify-write is needed: + /// load the array from the parent struct(s), assign into the element, then + /// store the array back up the chain. + fn lower_index_assign( + &mut self, + object: &TypedExpr, + index: &TypedExpr, + value: &TypedExpr, + sp: Option, + ) -> Operand { + let idx = self.lower_expr(index); + + // Detect compound index assignment pattern from parser desugaring: + // arr[idx] += rhs → arr[idx] = arr[idx] + rhs + // The parser clones index/object expressions, so they'd be re-evaluated + // on the RHS (wrong if they have side effects). Defer value computation + // to after the path is established so we can reuse the path locals. + let compound_info = if let TypedExprKind::Binary { left, op, right } = &value.kind { + if let TypedExprKind::Index { .. } = &left.kind { + Some((*op, right.as_ref())) + } else { + None + } + } else { + None + }; + + let val = if compound_info.is_none() { + self.lower_expr(value) + } else { + Operand::Const(AirConst::Null) // placeholder, computed below + }; + + // Collect the full access path (mix of Member and Index layers) from + // `object` back to the root variable. Handles all patterns: + // arr[i] = val, arr[i][j] = val, s.arr[i] = val, + // s.arr[i][j] = val, rows[i].cells[j] = val, etc. + enum PathStep<'a> { + Field { name: String, result_ty: AirType }, + Index { idx_expr: &'a TypedExpr, result_ty: AirType }, + } + + let mut steps: Vec> = Vec::new(); + let mut walk = object; + loop { + match &walk.kind { + TypedExprKind::Index { object, index: nested_idx } => { + let result_ty = self.lower_type_from_infer(&walk.ty); + steps.push(PathStep::Index { idx_expr: nested_idx, result_ty }); + walk = object; + } + TypedExprKind::Member { object, member } => { + let result_ty = self.lower_type_from_infer(&walk.ty); + steps.push(PathStep::Field { name: member.clone(), result_ty }); + walk = object; + } + _ => break, + } + } + steps.reverse(); // root → outermost + + // `walk` is now the root expression (usually an Identifier). + let root_name = if let TypedExprKind::Identifier(name) = &walk.kind { + Some(name.clone()) + } else { + None + }; + let root_op = self.lower_expr(walk); + let root_ty = self.lower_type_from_infer(&walk.ty); + let root_local = self.operand_to_local(root_op, &root_ty); + + // Read phase: load each intermediate into a mutable temp. + enum WriteBack { + Field(String), + Index(Operand), + } + struct TempInfo { + local: LocalId, + parent: LocalId, + wb: WriteBack, + } + + let mut temps: Vec = Vec::new(); + let mut cur_local = root_local; + + for step in &steps { + match step { + PathStep::Field { name, result_ty } => { + let tmp = self.alloc_temp_mut(result_ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(tmp), + rvalue: Rvalue::FieldAccess { + base: Operand::Copy(cur_local), + field: name.clone(), + }, + }, + sp, + ); + temps.push(TempInfo { local: tmp, parent: cur_local, wb: WriteBack::Field(name.clone()) }); + cur_local = tmp; + } + PathStep::Index { idx_expr, result_ty } => { + let index_op = self.lower_expr(idx_expr); + let tmp = self.alloc_temp_mut(result_ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(tmp), + rvalue: Rvalue::Index { + base: Operand::Copy(cur_local), + index: index_op.clone(), + }, + }, + sp, + ); + temps.push(TempInfo { local: tmp, parent: cur_local, wb: WriteBack::Index(index_op) }); + cur_local = tmp; + } + } + } + + // For compound index assigns, compute the value now using the path-loaded + // cur_local instead of re-evaluating the object path. + let final_val = if let Some((op, rhs_expr)) = compound_info { + let elem_ty = self.lower_type_from_infer(&value.ty); + let current = self.emit_rvalue_to_temp( + elem_ty.clone(), + Rvalue::Index { + base: Operand::Copy(cur_local), + index: idx.clone(), + }, + sp, + ); + let rhs = self.lower_expr(rhs_expr); + let air_op = super::lower_binop(&op); + self.emit_rvalue_to_temp( + elem_ty, + Rvalue::BinaryOp(air_op, current, rhs), + sp, + ) + } else { + val + }; + + // Write the value at the innermost level. + self.emit( + AirStmtKind::Assign { + place: Place::Index(cur_local, idx), + rvalue: Rvalue::Use(final_val), + }, + sp, + ); + + // Write-back phase: propagate modifications back up to the root. + for info in temps.iter().rev() { + match &info.wb { + WriteBack::Field(name) => { + self.emit( + AirStmtKind::Assign { + place: Place::Field(info.parent, name.clone()), + rvalue: Rvalue::Use(Operand::Copy(info.local)), + }, + sp, + ); + } + WriteBack::Index(index_op) => { + self.emit( + AirStmtKind::Assign { + place: Place::Index(info.parent, index_op.clone()), + rvalue: Rvalue::Use(Operand::Copy(info.local)), + }, + sp, + ); + } + } + } + + // Closure env / global write-back for root. + if let Some(ref name) = root_name { + if let Some(env_id) = self.closure_env_param { + if self.closure_captures.contains(name) { + self.emit( + AirStmtKind::Assign { + place: Place::Field(env_id, name.clone()), + rvalue: Rvalue::Use(Operand::Copy(root_local)), + }, + sp, + ); + } + } + if self.lookup_local(name).is_none() + && self.globals.iter().any(|g| g.name == *name) + { + self.emit( + AirStmtKind::CallVoid { + func: Callee::Named(format!("__aelys_global_set_{}", name)), + args: vec![Operand::Copy(root_local)], + }, + sp, + ); + } + } + + Operand::Const(AirConst::Null) + } + + fn lower_field_assign( + &mut self, + object: &TypedExpr, + field: &str, + value: &TypedExpr, + sp: Option, + ) -> Operand { + // Detect compound field assignment from parser desugaring: + // obj.field += rhs → obj.field = obj.field + rhs + // When the object path contains side-effecting expressions (e.g. + // arr[f()].field += rhs), the desugared form evaluates them twice. + // Detect the pattern and defer value computation to after the path + // is established, so we can reuse the already-loaded current value. + let compound_info = if let TypedExprKind::Binary { left, op, right } = &value.kind { + if let TypedExprKind::Member { .. } = &left.kind { + Some((*op, right.as_ref())) + } else { + None + } + } else { + None + }; + + // For non-compound assigns, lower the value normally. + // For compound assigns, we'll compute val later using the path locals. + let val = if compound_info.is_none() { + self.lower_expr(value) + } else { + // Placeholder — will be replaced below + Operand::Const(AirConst::Null) + }; + + // Collect the full access path from root → immediate parent of the + // assigned field. Each step is either a `.field` or `[idx]` access. + // This handles arbitrary mixes like `a.b[i].c.d[j].field = val`. + enum PathStep<'a> { + Field { name: String, result_ty: AirType }, + Index { idx_expr: &'a TypedExpr, result_ty: AirType }, + } + + let mut steps: Vec> = Vec::new(); + let mut current = object; + loop { + match ¤t.kind { + TypedExprKind::Member { object: inner, member } => { + let result_ty = self.lower_type_from_infer(¤t.ty); + steps.push(PathStep::Field { name: member.clone(), result_ty }); + current = inner; + } + TypedExprKind::Index { object: inner, index: idx_expr } => { + let result_ty = self.lower_type_from_infer(¤t.ty); + steps.push(PathStep::Index { idx_expr, result_ty }); + current = inner; + } + _ => break, + } + } + steps.reverse(); // root → deepest + + // `current` is now the root expression (usually an Identifier). + let root_name = if let TypedExprKind::Identifier(name) = ¤t.kind { + Some(name.clone()) + } else { + None + }; + let root_op = self.lower_expr(current); + let root_ty = self.lower_type_from_infer(¤t.ty); + let root_local = self.operand_to_local(root_op, &root_ty); + + // Read phase: load each intermediate into a mutable temp. + enum WriteBack { + Field(String), + Index(Operand), + } + struct TempInfo { + local: LocalId, + parent: LocalId, + wb: WriteBack, + } + + let mut temps: Vec = Vec::new(); + let mut cur_local = root_local; + + for step in &steps { + match step { + PathStep::Field { name, result_ty } => { + let tmp = self.alloc_temp_mut(result_ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(tmp), + rvalue: Rvalue::FieldAccess { + base: Operand::Copy(cur_local), + field: name.clone(), + }, + }, + sp, + ); + temps.push(TempInfo { local: tmp, parent: cur_local, wb: WriteBack::Field(name.clone()) }); + cur_local = tmp; + } + PathStep::Index { idx_expr, result_ty } => { + let idx_op = self.lower_expr(idx_expr); + let tmp = self.alloc_temp_mut(result_ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(tmp), + rvalue: Rvalue::Index { + base: Operand::Copy(cur_local), + index: idx_op.clone(), + }, + }, + sp, + ); + temps.push(TempInfo { local: tmp, parent: cur_local, wb: WriteBack::Index(idx_op) }); + cur_local = tmp; + } + } + } + + // For compound field assigns, compute the value now using the path-loaded + // intermediates instead of the pre-lowered val (which would re-evaluate + // side-effecting path expressions). + let final_val = if let Some((op, rhs_expr)) = compound_info { + let field_ty = self.lower_type_from_infer(&value.ty); + let current = self.emit_rvalue_to_temp( + field_ty.clone(), + Rvalue::FieldAccess { + base: Operand::Copy(cur_local), + field: field.to_string(), + }, + sp, + ); + let rhs = self.lower_expr(rhs_expr); + let air_op = super::lower_binop(&op); + self.emit_rvalue_to_temp( + field_ty, + Rvalue::BinaryOp(air_op, current, rhs), + sp, + ) + } else { + val + }; + + // Write the final field on the deepest temp. + self.emit( + AirStmtKind::Assign { + place: Place::Field(cur_local, field.to_string()), + rvalue: Rvalue::Use(final_val), + }, + sp, + ); + + // Write-back phase: propagate modifications back up to the root. + for info in temps.iter().rev() { + match &info.wb { + WriteBack::Field(name) => { + self.emit( + AirStmtKind::Assign { + place: Place::Field(info.parent, name.clone()), + rvalue: Rvalue::Use(Operand::Copy(info.local)), + }, + sp, + ); + } + WriteBack::Index(idx_op) => { + self.emit( + AirStmtKind::Assign { + place: Place::Index(info.parent, idx_op.clone()), + rvalue: Rvalue::Use(Operand::Copy(info.local)), + }, + sp, + ); + } + } + } + + // Write back to closure env or global store if needed. + if let Some(ref name) = root_name { + if let Some(env_id) = self.closure_env_param { + if self.closure_captures.contains(name) { + self.emit( + AirStmtKind::Assign { + place: Place::Field(env_id, name.clone()), + rvalue: Rvalue::Use(Operand::Copy(root_local)), + }, + sp, + ); + } + } + if self.lookup_local(name).is_none() + && self.globals.iter().any(|g| g.name == *name) + { + self.emit( + AirStmtKind::CallVoid { + func: Callee::Named(format!("__aelys_global_set_{}", name)), + args: vec![Operand::Copy(root_local)], + }, + sp, + ); + } + } + + Operand::Const(AirConst::Null) + } + + fn lower_callee(&mut self, callee: &TypedExpr) -> Callee { + match &callee.kind { + TypedExprKind::Identifier(name) => { + if let Some(id) = self.lookup_local(name) { + Callee::FnPtr(id) + } else if self.globals.iter().any(|global| global.name == *name) { + // A callable file-scope let is still data in global storage; lower the + // callee through the global getter so calls stay indirect. + let op = self.lower_expr(callee); + let ty = self.lower_type_from_infer(&callee.ty); + Callee::FnPtr(self.operand_to_local(op, &ty)) + } else { + Callee::Named(name.clone()) + } + } + TypedExprKind::Member { object, member } => { + if let TypedExprKind::Identifier(mod_name) = &object.kind { + let is_runtime_value = self.lookup_local(mod_name).is_some() + || self.globals.iter().any(|global| global.name == *mod_name); + if !is_runtime_value { + Callee::Named(format!("{}.{}", mod_name, member)) + } else { + // `value.field()` on a struct/global fnptr field must stay indirect. + let op = self.lower_expr(callee); + let ty = self.lower_type_from_infer(&callee.ty); + Callee::FnPtr(self.operand_to_local(op, &ty)) + } + } else { + let op = self.lower_expr(callee); + let ty = self.lower_type_from_infer(&callee.ty); + Callee::FnPtr(self.operand_to_local(op, &ty)) + } + } + _ => { + let op = self.lower_expr(callee); + let ty = self.lower_type_from_infer(&callee.ty); + Callee::FnPtr(self.operand_to_local(op, &ty)) + } + } + } + + // short-circuit lowering (and/or) + fn lower_short_circuit( + &mut self, + left: &TypedExpr, + right: &TypedExpr, + is_and: bool, + _parent: &TypedExpr, + ) -> Operand { + let result = self.alloc_temp_mut(AirType::Bool); + let lhs = self.lower_expr(left); + self.emit( + AirStmtKind::Assign { + place: Place::Local(result), + rvalue: Rvalue::Use(lhs), + }, + None, + ); + + let eval_right_id = self.alloc_block_id(); + let merge_id = self.alloc_block_id(); + + if is_and { + self.seal_block(AirTerminator::Branch { + cond: Operand::Copy(result), + then_block: eval_right_id, + else_block: merge_id, + }); + } else { + self.seal_block(AirTerminator::Branch { + cond: Operand::Copy(result), + then_block: merge_id, + else_block: eval_right_id, + }); + } + + self.fixup_block_id_noop(eval_right_id); + let rhs = self.lower_expr(right); + self.emit( + AirStmtKind::Assign { + place: Place::Local(result), + rvalue: Rvalue::Use(rhs), + }, + None, + ); + self.seal_block(AirTerminator::Goto(merge_id)); + + self.fixup_block_id_noop(merge_id); + Operand::Copy(result) + } + + fn lower_if_expr( + &mut self, + condition: &TypedExpr, + then_branch: &TypedExpr, + else_branch: &TypedExpr, + parent: &TypedExpr, + ) -> Operand { + let result_ty = self.lower_type_from_infer(&parent.ty); + let is_void = Self::is_void_like(&result_ty); + let result = if is_void { None } else { Some(self.alloc_temp_mut(result_ty)) }; + + let cond = self.lower_expr(condition); + let then_id = self.alloc_block_id(); + let else_id = self.alloc_block_id(); + let merge_id = self.alloc_block_id(); + + self.seal_block(AirTerminator::Branch { + cond, + then_block: then_id, + else_block: else_id, + }); + + self.fixup_block_id_noop(then_id); + if let Some(result) = result { + let then_val = self.lower_expr(then_branch); + self.emit( + AirStmtKind::Assign { + place: Place::Local(result), + rvalue: Rvalue::Use(then_val), + }, + None, + ); + } else { + self.lower_expr_discard(then_branch); + } + self.seal_block(AirTerminator::Goto(merge_id)); + + self.fixup_block_id_noop(else_id); + if let Some(result) = result { + let else_val = self.lower_expr(else_branch); + self.emit( + AirStmtKind::Assign { + place: Place::Local(result), + rvalue: Rvalue::Use(else_val), + }, + None, + ); + } else { + self.lower_expr_discard(else_branch); + } + self.seal_block(AirTerminator::Goto(merge_id)); + + self.fixup_block_id_noop(merge_id); + result.map_or(Operand::Const(AirConst::Null), Operand::Copy) + } + + // Lambda lowering: closures and function values in Aelys + // + // Every lambda, capturing or not, is lowered as a closure with an __env + // parameter and wrapped in a fat pointer { fn_ptr, env_ptr }. Named functions + // used as values also get wrapped in a fat pointer (with env_ptr = null). + // + // This uniformity is load-bearing: a call site receiving `fn(i64) -> i64` + // cannot know whether it got a named function, a non-capturing lambda, or a + // capturing closure. If these had different representations, indirect calls + // would need two codepaths and the type system would need to track the + // distinction. The alternative (generating thunks per named function, like + // OCaml) was rejected for the same reason: more AIR, more generated code, + // more surface for bugs. + // + // For capturing closures, the env struct is heap-allocated via Alloc (malloc). + // Stack allocation would be unsound: closures can escape their creation scope + // (returned from functions, stored in structs), and the env would dangle. + // Escape analysis to decide stack vs heap is not implemented. The env is + // intentionally leaked; the GC (@no_gc is the opt-out) will trace these + // allocations once it exists. The representation won't need to change. + // + // Captures are by value at creation time. Mutating the original variable after + // closure creation does not affect what the closure sees. + fn lower_lambda( + &mut self, + params: &[TypedParam], + return_type: &InferType, + body: &[TypedStmt], + captures: &[(String, InferType)], + parent: &TypedExpr, + ) -> Operand { + use aelys_sema::TypedFunction; + + let lambda_name = format!("__lambda_{}", self.next_function_id); + let fake_func = TypedFunction { + name: lambda_name.clone(), + type_params: Vec::new(), + params: params.to_vec(), + return_type: return_type.clone(), + body: body.to_vec(), + decorators: Vec::new(), + is_pub: false, + span: parent.span, + captures: captures.to_vec(), + }; + // Always go through the closure path so every lambda gets an __env + // parameter, ensuring a uniform calling convention for all function values. + self.lower_function_as_closure(&fake_func); + + let sp = Some(self.span(&parent.span)); + let result_ty = self.lower_type_from_infer(&parent.ty); + let runtime_caps = self.runtime_captures(captures); + + if runtime_caps.is_empty() { + // Non-capturing: fat pointer with null env + self.emit_rvalue_to_temp( + result_ty, + Rvalue::ClosureCreate { + fn_name: lambda_name, + env: Operand::Const(AirConst::Null), + }, + sp, + ) + } else { + // Capturing: heap-allocate env, store captures, build fat pointer + let env_name = format!("__closure_env_{}", lambda_name); + let env_ptr_ty = AirType::Ptr(Box::new(AirType::Struct(env_name.clone()))); + let env_ptr = self.alloc_temp(env_ptr_ty.clone()); + self.emit( + AirStmtKind::Alloc { + local: env_ptr, + ty: AirType::Struct(env_name.clone()), + }, + sp, + ); + // Store each captured value into the env struct + for (cap_name, _cap_ty) in &runtime_caps { + let cap_val = if let Some(id) = self.lookup_local(cap_name) { + Operand::Copy(id) + } else { + self.report_error(format!( + "ICE: captured variable `{}` not found in scope during closure lowering", + cap_name + )); + continue; + }; + self.emit( + AirStmtKind::Assign { + place: Place::Field(env_ptr, cap_name.clone()), + rvalue: Rvalue::Use(cap_val), + }, + sp, + ); + } + // Build fat pointer { fn_ptr, env_ptr } + self.emit_rvalue_to_temp( + result_ty, + Rvalue::ClosureCreate { + fn_name: lambda_name, + env: Operand::Copy(env_ptr), + }, + sp, + ) + } + } + + fn lower_match_expr( + &mut self, + scrutinee: &TypedExpr, + arms: &[TypedMatchArm], + parent: &TypedExpr, + ) -> Operand { + let sp = Some(self.span(&parent.span)); + let result_ty = self.lower_type_from_infer(&parent.ty); + let is_void = Self::is_void_like(&result_ty); + // Only allocate a result local when the match produces a value. + let result = if is_void { None } else { Some(self.alloc_temp_mut(result_ty)) }; + + // Lower the scrutinee + let scrutinee_op = self.lower_expr(scrutinee); + + // Get the enum name from the scrutinee type + let enum_name = match &scrutinee.ty { + InferType::Enum(name, _) => name.clone(), + _ => { + self.report_error(format!( + "match scrutinee is not an enum type: {:?}", + scrutinee.ty + )); + return Operand::Const(AirConst::Null); + } + }; + + // Extract the tag + let tag_op = self.emit_rvalue_to_temp( + AirType::I32, + Rvalue::EnumTag { + enum_name: enum_name.clone(), + operand: scrutinee_op.clone(), + }, + sp, + ); + + // Allocate blocks: one per arm + merge block + let merge_id = self.alloc_block_id(); + + // Separate variant arms from wildcard + let mut switch_targets: Vec<(AirConst, BlockId)> = Vec::new(); + let mut wildcard_arm: Option<&TypedMatchArm> = None; + let mut arm_blocks: Vec<(BlockId, &TypedMatchArm)> = Vec::new(); + + for arm in arms { + match &arm.pattern { + TypedPattern::Variant { tag, .. } => { + let block_id = self.alloc_block_id(); + switch_targets.push((AirConst::Int(*tag as i64, AirIntSize::I32), block_id)); + arm_blocks.push((block_id, arm)); + } + TypedPattern::Wildcard => { + wildcard_arm = Some(arm); + } + } + } + + // Allocate a default block for the wildcard (or unreachable if exhaustive) + let default_id = self.alloc_block_id(); + + // Seal current block with Switch terminator + self.seal_block(AirTerminator::Switch { + discr: tag_op, + targets: switch_targets, + default: default_id, + }); + + // Lower each variant arm + for (block_id, arm) in arm_blocks { + self.fixup_block_id_noop(block_id); + + // Bind payload fields if the pattern has bindings + if let TypedPattern::Variant { + enum_name: arm_enum_name, + tag, + bindings, + .. + } = &arm.pattern + { + for (field_index, (name, ty)) in bindings.iter().enumerate() { + let field_ty = self.lower_type_from_infer(ty); + let field_local = self.alloc_named_local(name, field_ty.clone(), false, sp); + self.emit( + AirStmtKind::Assign { + place: Place::Local(field_local), + rvalue: Rvalue::EnumPayload { + enum_name: arm_enum_name.clone(), + tag: *tag, + operand: scrutinee_op.clone(), + field_index: field_index as u32, + }, + }, + sp, + ); + } + } + + if let Some(result) = result { + let arm_val = self.lower_expr(&arm.body); + self.emit( + AirStmtKind::Assign { + place: Place::Local(result), + rvalue: Rvalue::Use(arm_val), + }, + None, + ); + } else { + self.lower_expr_discard(&arm.body); + } + self.seal_block(AirTerminator::Goto(merge_id)); + } + + // Lower the default block (wildcard or unreachable) + self.fixup_block_id_noop(default_id); + if let Some(wildcard) = wildcard_arm { + if let Some(result) = result { + let wc_val = self.lower_expr(&wildcard.body); + self.emit( + AirStmtKind::Assign { + place: Place::Local(result), + rvalue: Rvalue::Use(wc_val), + }, + None, + ); + } else { + self.lower_expr_discard(&wildcard.body); + } + self.seal_block(AirTerminator::Goto(merge_id)); + } else { + // Exhaustive match without wildcard -- all variants covered, default is unreachable + self.seal_block(AirTerminator::Unreachable); + } + + self.fixup_block_id_noop(merge_id); + result.map_or(Operand::Const(AirConst::Null), Operand::Copy) + } + + // format string -> __aelys_str_concat / __aelys_to_string + fn lower_fmt_string(&mut self, parts: &[TypedFmtStringPart], sp: Option) -> Operand { + let mut operands: Vec = Vec::new(); + + for part in parts { + match part { + TypedFmtStringPart::Literal(s) => { + operands.push(Operand::Const(AirConst::Str(s.clone()))); + } + TypedFmtStringPart::Expr(expr) => { + let val = self.lower_expr(expr); + if matches!(expr.ty, InferType::String) { + operands.push(val); + } else { + let converted = self.emit_rvalue_to_temp( + AirType::Str, + Rvalue::Call { + func: Callee::Named("__aelys_to_string".to_string()), + args: vec![val], + }, + None, + ); + operands.push(converted); + } + } + TypedFmtStringPart::Placeholder => { + operands.push(Operand::Const(AirConst::Str(String::new()))); + } + } + } + + if operands.is_empty() { + return Operand::Const(AirConst::Str(String::new())); + } + if operands.len() == 1 { + return operands.into_iter().next().unwrap(); + } + + let mut acc = operands.remove(0); + for part in operands { + acc = self.emit_rvalue_to_temp( + AirType::Str, + Rvalue::Call { + func: Callee::Named("__aelys_str_concat".to_string()), + args: vec![acc, part], + }, + sp, + ); + } + acc + } +} diff --git a/air/src/lower/loops.rs b/air/src/lower/loops.rs new file mode 100644 index 0000000..2d407b6 --- /dev/null +++ b/air/src/lower/loops.rs @@ -0,0 +1,351 @@ +use super::LoweringContext; +use crate::*; +use aelys_sema::{InferType, TypedExpr, TypedExprKind, TypedStmt}; +use aelys_syntax::UnaryOp; + +impl<'a> LoweringContext<'a> { + pub(super) fn lower_for( + &mut self, + iterator: &str, + start: &TypedExpr, + end: &TypedExpr, + inclusive: bool, + step: &Option, + body: &TypedStmt, + ) { + let start_span = Some(self.span(&start.span)); + let iter_ty = self.lower_type_from_infer(&start.ty); + + // Evaluate range bounds BEFORE allocating the iterator local so that + // any reference to `iterator` in the bounds resolves to the outer + // binding (e.g. `for n in 0..n` where the bound `n` is a param). + let start_op = self.lower_expr(start); + let end_op = self.lower_expr(end); + + // Save scope so the iterator variable doesn't leak into the enclosing + // scope after the loop (e.g. `let i = 999; for i in 0..5 { ... }; use(i)` + // should still see i=999 after the loop). + let scope_depth = self.locals_by_name.len(); + + let iter_local = self.alloc_named_local(iterator, iter_ty.clone(), true, start_span); + self.emit( + AirStmtKind::Assign { + place: Place::Local(iter_local), + rvalue: Rvalue::Use(start_op), + }, + start_span, + ); + + let end_local = self.alloc_temp(iter_ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(end_local), + rvalue: Rvalue::Use(end_op), + }, + Some(self.span(&end.span)), + ); + + // Evaluate the step operand ONCE in the entry block so it is not + // re-read on every iteration (the variable might be mutated in the + // loop body). + let step_operand = if let Some(step_expr) = step { + self.lower_expr(step_expr) + } else { + let step_c = iter_ty + .int_size() + .map(|s| AirConst::Int(1, s)) + .unwrap_or(AirConst::IntLiteral(1)); + Operand::Const(step_c) + }; + let step_local = self.alloc_temp(iter_ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(step_local), + rvalue: Rvalue::Use(step_operand), + }, + None, + ); + + let header_id = self.alloc_block_id(); + let body_id = self.alloc_block_id(); + let incr_id = self.alloc_block_id(); + let exit_id = self.alloc_block_id(); + + self.seal_block(AirTerminator::Goto(header_id)); + + self.fixup_block_id_noop(header_id); + // For negative steps the iteration condition is reversed: + // positive step: iter < end (or <= for inclusive) + // negative step: iter > end (or >= for inclusive) + // + // If the step is a compile-time constant we pick the direction + // statically. Otherwise we emit a runtime sign check. + let step_is_negative = step.as_ref().is_some_and(|s| step_expr_is_negative(s)); + let step_is_const = step.as_ref().map_or(true, |s| step_expr_is_negative(s) || step_expr_is_positive(s)); + + let cond_local = self.alloc_temp(AirType::Bool); + if step_is_const { + // Static direction — single comparison. + let cmp_op = if step_is_negative { + if inclusive { BinOp::Ge } else { BinOp::Gt } + } else { + if inclusive { BinOp::Le } else { BinOp::Lt } + }; + self.emit( + AirStmtKind::Assign { + place: Place::Local(cond_local), + rvalue: Rvalue::BinaryOp( + cmp_op, + Operand::Copy(iter_local), + Operand::Copy(end_local), + ), + }, + None, + ); + } else { + // Dynamic step: emit runtime sign check. + // step_neg = step < 0 + // fwd_cmp = iter < end (or <=) + // bwd_cmp = iter > end (or >=) + // cond = step_neg ? bwd_cmp : fwd_cmp + let zero = Operand::Const(iter_ty + .int_size() + .map(|s| AirConst::Int(0, s)) + .unwrap_or(AirConst::IntLiteral(0))); + let step_neg_local = self.alloc_temp(AirType::Bool); + self.emit( + AirStmtKind::Assign { + place: Place::Local(step_neg_local), + rvalue: Rvalue::BinaryOp(BinOp::Lt, Operand::Copy(step_local), zero), + }, + None, + ); + let fwd_op = if inclusive { BinOp::Le } else { BinOp::Lt }; + let bwd_op = if inclusive { BinOp::Ge } else { BinOp::Gt }; + let fwd_local = self.alloc_temp(AirType::Bool); + self.emit( + AirStmtKind::Assign { + place: Place::Local(fwd_local), + rvalue: Rvalue::BinaryOp(fwd_op, Operand::Copy(iter_local), Operand::Copy(end_local)), + }, + None, + ); + let bwd_local = self.alloc_temp(AirType::Bool); + self.emit( + AirStmtKind::Assign { + place: Place::Local(bwd_local), + rvalue: Rvalue::BinaryOp(bwd_op, Operand::Copy(iter_local), Operand::Copy(end_local)), + }, + None, + ); + // cond = if step_neg { bwd } else { fwd } + // Lowered as: cond = (step_neg & bwd) | (!step_neg & fwd) + let neg_and_bwd = self.alloc_temp(AirType::Bool); + self.emit( + AirStmtKind::Assign { + place: Place::Local(neg_and_bwd), + rvalue: Rvalue::BinaryOp(BinOp::And, Operand::Copy(step_neg_local), Operand::Copy(bwd_local)), + }, + None, + ); + let not_neg = self.alloc_temp(AirType::Bool); + self.emit( + AirStmtKind::Assign { + place: Place::Local(not_neg), + rvalue: Rvalue::UnaryOp(UnOp::Not, Operand::Copy(step_neg_local)), + }, + None, + ); + let pos_and_fwd = self.alloc_temp(AirType::Bool); + self.emit( + AirStmtKind::Assign { + place: Place::Local(pos_and_fwd), + rvalue: Rvalue::BinaryOp(BinOp::And, Operand::Copy(not_neg), Operand::Copy(fwd_local)), + }, + None, + ); + self.emit( + AirStmtKind::Assign { + place: Place::Local(cond_local), + rvalue: Rvalue::BinaryOp(BinOp::Or, Operand::Copy(neg_and_bwd), Operand::Copy(pos_and_fwd)), + }, + None, + ); + } + self.seal_block(AirTerminator::Branch { + cond: Operand::Copy(cond_local), + then_block: body_id, + else_block: exit_id, + }); + + self.loop_stack.push(super::LoopBlocks { + header: incr_id, + exit: exit_id, + }); + self.fixup_block_id_noop(body_id); + self.lower_stmt(body); + if !self.last_block_is_terminated() { + self.seal_block(AirTerminator::Goto(incr_id)); + } + self.loop_stack.pop(); + + self.fixup_block_id_noop(incr_id); + self.emit( + AirStmtKind::Assign { + place: Place::Local(iter_local), + rvalue: Rvalue::BinaryOp(BinOp::Add, Operand::Copy(iter_local), Operand::Copy(step_local)), + }, + None, + ); + self.seal_block(AirTerminator::Goto(header_id)); + + self.fixup_block_id_noop(exit_id); + self.locals_by_name.truncate(scope_depth); + } + + pub(super) fn lower_foreach( + &mut self, + iterator: &str, + iterable: &TypedExpr, + elem_type: &InferType, + body: &TypedStmt, + sp: Option, + ) { + let collection = self.lower_expr(iterable); + let col_ty = self.lower_type_from_infer(&iterable.ty); + let col_local = self.alloc_temp(col_ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(col_local), + rvalue: Rvalue::Use(collection), + }, + sp, + ); + + let idx_local = self.alloc_temp_mut(AirType::I64); + self.emit( + AirStmtKind::Assign { + place: Place::Local(idx_local), + rvalue: Rvalue::Use(Operand::Const(AirConst::IntLiteral(0))), + }, + None, + ); + + let len_local = self.alloc_temp(AirType::I64); + let len_rvalue = match &col_ty { + AirType::Array(_, n) => { + Rvalue::Use(Operand::Const(AirConst::IntLiteral(*n as i64))) + } + AirType::Str => Rvalue::FieldAccess { + base: Operand::Copy(col_local), + field: "len".to_string(), + }, + _ => Rvalue::Call { + func: Callee::Named("__aelys_len".to_string()), + args: vec![Operand::Copy(col_local)], + }, + }; + self.emit( + AirStmtKind::Assign { + place: Place::Local(len_local), + rvalue: len_rvalue, + }, + None, + ); + + // Save scope so the iterator variable doesn't leak after the loop. + let scope_depth = self.locals_by_name.len(); + + let elem_air_ty = self.lower_type_from_infer(elem_type); + let elem_local = self.alloc_named_local(iterator, elem_air_ty, false, sp); + + let header_id = self.alloc_block_id(); + let body_id = self.alloc_block_id(); + let incr_id = self.alloc_block_id(); + let exit_id = self.alloc_block_id(); + + self.seal_block(AirTerminator::Goto(header_id)); + self.fixup_block_id_noop(header_id); + + let cond_local = self.alloc_temp(AirType::Bool); + self.emit( + AirStmtKind::Assign { + place: Place::Local(cond_local), + rvalue: Rvalue::BinaryOp( + BinOp::Lt, + Operand::Copy(idx_local), + Operand::Copy(len_local), + ), + }, + None, + ); + self.seal_block(AirTerminator::Branch { + cond: Operand::Copy(cond_local), + then_block: body_id, + else_block: exit_id, + }); + + self.fixup_block_id_noop(body_id); + self.emit( + AirStmtKind::Assign { + place: Place::Local(elem_local), + rvalue: Rvalue::Index { + base: Operand::Copy(col_local), + index: Operand::Copy(idx_local), + }, + }, + None, + ); + + self.loop_stack.push(super::LoopBlocks { + header: incr_id, + exit: exit_id, + }); + self.lower_stmt(body); + if !self.last_block_is_terminated() { + self.seal_block(AirTerminator::Goto(incr_id)); + } + self.loop_stack.pop(); + + self.fixup_block_id_noop(incr_id); + self.emit( + AirStmtKind::Assign { + place: Place::Local(idx_local), + rvalue: Rvalue::BinaryOp( + BinOp::Add, + Operand::Copy(idx_local), + Operand::Const(AirConst::IntLiteral(1)), + ), + }, + None, + ); + self.seal_block(AirTerminator::Goto(header_id)); + + self.fixup_block_id_noop(exit_id); + self.locals_by_name.truncate(scope_depth); + } +} + +/// Returns true if the step expression is a compile-time negative constant. +fn step_expr_is_negative(step: &TypedExpr) -> bool { + match &step.kind { + TypedExprKind::Int(v) => *v < 0, + TypedExprKind::Unary { op: UnaryOp::Neg, operand } => match &operand.kind { + TypedExprKind::Int(v) => *v > 0, + _ => false, + }, + _ => false, + } +} + +/// Returns true if the step expression is a compile-time positive constant. +fn step_expr_is_positive(step: &TypedExpr) -> bool { + match &step.kind { + TypedExprKind::Int(v) => *v > 0, + TypedExprKind::Unary { op: UnaryOp::Neg, operand } => match &operand.kind { + TypedExprKind::Int(v) => *v < 0, + _ => false, + }, + _ => false, + } +} diff --git a/air/src/lower/mod.rs b/air/src/lower/mod.rs new file mode 100644 index 0000000..590ba56 --- /dev/null +++ b/air/src/lower/mod.rs @@ -0,0 +1,490 @@ +mod expr; +mod loops; +mod program; +mod stmts; + +use crate::*; +use aelys_sema::{InferType, TypedProgram}; +use aelys_syntax::BinaryOp; + +pub fn lower(program: &TypedProgram) -> AirProgram { + try_lower(program).unwrap_or_else(|errors| panic!("{}", format_lowering_errors(&errors))) +} + +pub fn try_lower(program: &TypedProgram) -> Result> { + let mut cx = LoweringContext::new(program); + cx.lower_program(); + cx.finish() +} + +pub fn lower_with_gc_mode(program: &TypedProgram, file_gc_mode: GcMode) -> AirProgram { + try_lower_with_gc_mode(program, file_gc_mode) + .unwrap_or_else(|errors| panic!("{}", format_lowering_errors(&errors))) +} + +pub fn try_lower_with_gc_mode( + program: &TypedProgram, + file_gc_mode: GcMode, +) -> Result> { + let mut cx = LoweringContext::new(program); + cx.file_gc_mode = file_gc_mode; + cx.lower_program(); + cx.finish() +} + +pub(crate) struct LoweringContext<'a> { + pub(super) program: &'a TypedProgram, + pub(super) functions: Vec, + pub(super) structs: Vec, + pub(super) enums: Vec, + pub(super) globals: Vec, + pub(super) source_files: Vec, + pub(super) next_function_id: u32, + pub(super) next_local_id: u32, + pub(super) next_block_id: u32, + pub(super) file_gc_mode: GcMode, + pub(super) current_blocks: Vec, + pub(super) current_locals: Vec, + pub(super) current_params: Vec, + pub(super) current_stmts: Vec, + pub(super) locals_by_name: Vec<(String, LocalId)>, + pub(super) loop_stack: Vec, + pub(super) type_params_map: Vec<(String, TypeParamId)>, + pub(super) pending_block_id: Option, + pub(super) block_aliases: Vec<(u32, u32)>, + /// When inside a closure body, the local holding the env pointer (__env param). + /// Used to write back mutations to captured variables. + pub(super) closure_env_param: Option, + /// Names of variables captured from the enclosing scope (keys of the env struct). + pub(super) closure_captures: std::collections::HashSet, + /// collected compile errors from lowering + /// if non-empty after lowering completes, `finish()` returns them to the caller + pub(super) lowering_errors: Vec, +} + +pub(super) struct LoopBlocks { + pub(super) header: BlockId, + pub(super) exit: BlockId, +} + +impl<'a> LoweringContext<'a> { + fn new(program: &'a TypedProgram) -> Self { + Self { + program, + functions: Vec::new(), + structs: Vec::new(), + enums: Vec::new(), + globals: Vec::new(), + source_files: vec![program.source.name.clone()], + next_function_id: 0, + next_local_id: 0, + next_block_id: 0, + file_gc_mode: GcMode::Managed, + current_blocks: Vec::new(), + current_locals: Vec::new(), + current_params: Vec::new(), + current_stmts: Vec::new(), + locals_by_name: Vec::new(), + loop_stack: Vec::new(), + type_params_map: Vec::new(), + pending_block_id: None, + block_aliases: Vec::new(), + closure_env_param: None, + closure_captures: std::collections::HashSet::new(), + lowering_errors: Vec::new(), + } + } + + fn finish(self) -> Result> { + if !self.lowering_errors.is_empty() { + return Err(self.lowering_errors); + } + Ok(AirProgram { + functions: self.functions, + structs: self.structs, + enums: self.enums, + globals: self.globals, + source_files: self.source_files, + mono_instances: Vec::new(), + struct_sizes: std::collections::HashMap::new(), + }) + } + + pub(super) fn alloc_function_id(&mut self) -> FunctionId { + let id = FunctionId(self.next_function_id); + self.next_function_id += 1; + id + } + + pub(super) fn alloc_local_id(&mut self) -> LocalId { + let id = LocalId(self.next_local_id); + self.next_local_id += 1; + id + } + + pub(super) fn alloc_block_id(&mut self) -> BlockId { + let id = BlockId(self.next_block_id); + self.next_block_id += 1; + id + } + + pub(super) fn alloc_temp(&mut self, ty: AirType) -> LocalId { + let id = self.alloc_local_id(); + self.current_locals.push(AirLocal { + id, + ty, + name: None, + is_mut: false, + span: None, + }); + id + } + + // alloc_temp creates immutable locals ->> codegen uses a flat value_map that doesn't respect SSA dominance + // anything written from 2+ blocks needs an alloca. + pub(super) fn alloc_temp_mut(&mut self, ty: AirType) -> LocalId { + let id = self.alloc_local_id(); + self.current_locals.push(AirLocal { + id, + ty, + name: None, + is_mut: true, + span: None, + }); + id + } + + pub(super) fn alloc_named_local( + &mut self, + name: &str, + ty: AirType, + is_mut: bool, + span: Option, + ) -> LocalId { + let id = self.alloc_local_id(); + self.current_locals.push(AirLocal { + id, + ty, + name: Some(name.to_string()), + is_mut, + span, + }); + self.locals_by_name.push((name.to_string(), id)); + id + } + + pub(super) fn lookup_local(&self, name: &str) -> Option { + self.locals_by_name + .iter() + .rev() + .find(|(n, _)| n == name) + .map(|(_, id)| *id) + } + + pub(super) fn emit(&mut self, kind: AirStmtKind, span: Option) { + self.current_stmts.push(AirStmt { kind, span }); + } + + pub(super) fn seal_block(&mut self, terminator: AirTerminator) -> BlockId { + let id = self + .pending_block_id + .take() + .unwrap_or_else(|| self.alloc_block_id()); + self.current_blocks.push(AirBlock { + id, + stmts: std::mem::take(&mut self.current_stmts), + terminator, + }); + id + } + + pub(super) fn span(&self, s: &aelys_syntax::Span) -> Span { + Span { + file: 0, + lo: s.start as u32, + hi: s.end as u32, + } + } + + /// Alloc a temp, emit an Assign of rvalue into it, return Copy(tmp). + pub(super) fn emit_rvalue_to_temp( + &mut self, + ty: AirType, + rvalue: Rvalue, + sp: Option, + ) -> Operand { + let tmp = self.alloc_temp(ty); + self.emit( + AirStmtKind::Assign { + place: Place::Local(tmp), + rvalue, + }, + sp, + ); + Operand::Copy(tmp) + } + + /// Extract a LocalId from an Operand, materializing a const to a temp if needed. + pub(super) fn operand_to_local(&mut self, op: Operand, ty: &AirType) -> LocalId { + match op { + Operand::Copy(id) | Operand::Move(id) => id, + Operand::Const(_) => { + let tmp = self.alloc_temp(ty.clone()); + self.emit( + AirStmtKind::Assign { + place: Place::Local(tmp), + rvalue: Rvalue::Use(op), + }, + None, + ); + tmp + } + } + } + + /// Report a compile error and continue lowering with a fallback value. + /// All errors are returned together at the end via `finish()`. + pub(super) fn report_error(&mut self, message: String) { + self.lowering_errors.push(message); + } + + pub(super) fn lower_type_params(&mut self, type_params: &[String]) -> Vec { + type_params + .iter() + .enumerate() + .map(|(i, name)| { + let id = TypeParamId(i as u32); + self.type_params_map.push((name.clone(), id)); + id + }) + .collect() + } + + pub(super) fn lower_type_from_infer(&self, ty: &InferType) -> AirType { + match ty { + InferType::I8 => AirType::I8, + InferType::I16 => AirType::I16, + InferType::I32 => AirType::I32, + InferType::I64 => AirType::I64, + InferType::U8 => AirType::U8, + InferType::U16 => AirType::U16, + InferType::U32 => AirType::U32, + InferType::U64 => AirType::U64, + InferType::F32 => AirType::F32, + InferType::F64 => AirType::F64, + InferType::Bool => AirType::Bool, + InferType::String => AirType::Str, + InferType::Null => AirType::Ptr(Box::new(AirType::Void)), + InferType::Function { params, ret } => AirType::FnPtr { + params: params + .iter() + .map(|p| self.lower_type_from_infer(p)) + .collect(), + ret: Box::new(self.lower_type_from_infer(ret)), + conv: CallingConv::Aelys, + }, + InferType::Array(inner, Some(n)) => { + AirType::Array(Box::new(self.lower_type_from_infer(inner)), *n) + } + InferType::Array(inner, None) => { + AirType::Slice(Box::new(self.lower_type_from_infer(inner))) + } + InferType::Vec(inner) => AirType::Slice(Box::new(self.lower_type_from_infer(inner))), + // TODO: add support for InferType::Tuple in the backend + InferType::Tuple(_) => { + #[cfg(debug_assertions)] + eprintln!( + "[AIR] warning: InferType::Tuple reached lower_type_from_infer, \ + tuples are not yet supported in the LLVM backend" + ); + AirType::Opaque + } + // TODO: add support for InferType::Range in the backend + InferType::Range => { + #[cfg(debug_assertions)] + eprintln!( + "[AIR] warning: InferType::Range reached lower_type_from_infer, \ + ranges are not yet supported in the LLVM backend" + ); + AirType::Opaque + } + InferType::Struct(name) => { + if let Some((_, id)) = self.type_params_map.iter().find(|(n, _)| n == name) { + AirType::Param(*id) + } else { + AirType::Struct(name.clone()) + } + } + InferType::Enum(name, type_args) => { + if type_args.is_empty() { + AirType::Enum(name.clone()) + } else { + // When sema preserves concrete type args (e.g., Enum("Option", [I64])), + // pre-compute the mangled name so that the mono pass can use the local's + // type to disambiguate unit variant assignments. + let lowered_args: Vec = type_args + .iter() + .map(|a| self.lower_type_from_infer(a)) + .collect(); + // Only pre-mangle if all type args are fully concrete. + // Opaque/Void come from unresolved inference, and Param comes + // from generic function bodies — both would produce nonsensical + // mangled names that function monomorphization can't rewrite. + let all_concrete = lowered_args + .iter() + .all(|t| !matches!(t, AirType::Opaque | AirType::Void | AirType::Param(_))); + if all_concrete { + let suffix = lowered_args + .iter() + .map(|t| crate::mono::substitute::type_to_string(t)) + .collect::>() + .join("$"); + AirType::Enum(format!("__mono_{}_{}", name, suffix)) + } else { + AirType::Enum(name.clone()) + } + } + } + // A Var reaching lowering is always a compiler bug: finalize should have converted every Var to Dynamic before the AIR stage. + // Map to Opaque so the validation pass rejects it with a clear diagnostic. + InferType::Var(_id) => { + #[cfg(debug_assertions)] + eprintln!( + "[AIR] ICE: InferType::Var({}) leaked past finalization into lower_type_from_infer", + _id.0 + ); + AirType::Opaque + } + // Never (bottom type) represents unreachable code; map to Void. + InferType::Never => AirType::Void, + // Dynamic = sema's "gradual typing" fallback. For generic call results, monomorphization patches the type before codegen. + // For anything else (error recovery, unresolved inference), Opaque survives past mono and the validation pass rejects it with a clear diagnostic + InferType::Dynamic => AirType::Opaque, + } + } + + /// Check that a stack array doesn't exceed the 1MB stack size threshold. + /// Reports a compile error if the array is too large (no longer panics) + pub(super) fn check_stack_array_size(&mut self, elem_ty: &AirType, n: u64) { + const MAX_STACK_BYTES: u64 = 1024 * 1024; // 1 MB + let elem_size = self.stack_array_elem_size(elem_ty) as u64; + let total = n.saturating_mul(elem_size); + if total > MAX_STACK_BYTES { + self.report_error(format!( + "stack array too large: [{}; {}] = {} bytes (max {} bytes). \ + Consider using a smaller size or a heap-allocated collection.", + crate::print::fmt_type(elem_ty), + n, + total, + MAX_STACK_BYTES, + )); + } + } + + fn stack_array_elem_size(&self, elem_ty: &AirType) -> u32 { + let mut probe = AirProgram { + functions: vec![AirFunction { + id: FunctionId(0), + name: "__stack_size_probe".to_string(), + gc_mode: GcMode::Managed, + type_params: vec![], + params: vec![], + ret_ty: AirType::Void, + locals: vec![AirLocal { + id: LocalId(0), + ty: elem_ty.clone(), + name: Some("__probe".to_string()), + is_mut: false, + span: None, + }], + blocks: vec![], + is_extern: true, + calling_conv: CallingConv::Aelys, + attributes: FunctionAttribs { + inline: InlineHint::Default, + no_gc: false, + no_unwind: false, + cold: false, + }, + span: None, + }], + structs: self.structs.clone(), + enums: self.enums.clone(), + globals: vec![], + source_files: vec![], + mono_instances: vec![], + struct_sizes: std::collections::HashMap::new(), + }; + + // `layout_of` is context-free and underestimates data enums as 4 bytes. + // Build a tiny AIR probe so mono + layout can recover the real aggregate size. + probe = crate::mono::monomorphize(probe).unwrap(); + let _ = crate::layout::compute_layouts(&mut probe); + crate::layout::resolved_layout(elem_ty, &probe.struct_sizes).size + } + + pub(super) fn gc_mode_for_function(&self, func: &aelys_sema::TypedFunction) -> GcMode { + if func.decorators.iter().any(|d| d.name == "no_gc") { + GcMode::Manual + } else { + self.file_gc_mode + } + } +} + +fn format_lowering_errors(errors: &[String]) -> String { + let joined = errors + .iter() + .enumerate() + .map(|(i, e)| format!(" {}. {}", i + 1, e)) + .collect::>() + .join("\n"); + format!( + "AIR lowering failed with {} error(s):\n{}", + errors.len(), + joined + ) +} + +pub(crate) fn infer_to_int_size(ty: &InferType) -> AirIntSize { + match ty { + InferType::I8 => AirIntSize::I8, + InferType::I16 => AirIntSize::I16, + InferType::I32 => AirIntSize::I32, + InferType::I64 => AirIntSize::I64, + InferType::U8 => AirIntSize::U8, + InferType::U16 => AirIntSize::U16, + InferType::U32 => AirIntSize::U32, + InferType::U64 => AirIntSize::U64, + _ => AirIntSize::I64, + } +} + +fn lower_binop(op: &BinaryOp) -> BinOp { + match op { + BinaryOp::Add => BinOp::Add, + BinaryOp::Sub => BinOp::Sub, + BinaryOp::Mul => BinOp::Mul, + BinaryOp::Div => BinOp::Div, + BinaryOp::Mod => BinOp::Rem, + BinaryOp::Eq => BinOp::Eq, + BinaryOp::Ne => BinOp::Ne, + BinaryOp::Lt => BinOp::Lt, + BinaryOp::Le => BinOp::Le, + BinaryOp::Gt => BinOp::Gt, + BinaryOp::Ge => BinOp::Ge, + BinaryOp::Shl => BinOp::Shl, + BinaryOp::Shr => BinOp::Shr, + BinaryOp::BitAnd => BinOp::BitAnd, + BinaryOp::BitOr => BinOp::BitOr, + BinaryOp::BitXor => BinOp::BitXor, + } +} + +fn lower_unop(op: &aelys_syntax::UnaryOp) -> UnOp { + match op { + aelys_syntax::UnaryOp::Neg => UnOp::Neg, + aelys_syntax::UnaryOp::Not => UnOp::Not, + aelys_syntax::UnaryOp::BitNot => UnOp::BitNot, + } +} diff --git a/air/src/lower/program.rs b/air/src/lower/program.rs new file mode 100644 index 0000000..84ee0e0 --- /dev/null +++ b/air/src/lower/program.rs @@ -0,0 +1,584 @@ +use super::LoweringContext; +use crate::*; +use aelys_sema::{InferType, TypedFunction, TypedParam, TypedStmtKind}; + +impl<'a> LoweringContext<'a> { + pub(super) fn lower_program(&mut self) { + for stmt in &self.program.stmts { + if let TypedStmtKind::StructDecl { + name, + type_params, + fields, + } = &stmt.kind + { + if type_params.is_empty() { + self.lower_struct_decl(name, type_params, fields, &stmt.span); + } + } + } + + // Collect enum definitions + for stmt in &self.program.stmts { + if let TypedStmtKind::EnumDecl { + name, + type_params, + variants, + } = &stmt.kind + { + self.lower_enum_decl(name, type_params, variants, &stmt.span); + } + } + + let stmts: Vec<_> = self.program.stmts.clone(); + for stmt in &stmts { + match &stmt.kind { + TypedStmtKind::StructDecl { .. } + | TypedStmtKind::EnumDecl { .. } + | TypedStmtKind::Function(_) => {} + _ => self.lower_toplevel_stmt(stmt), + } + } + + for stmt in &stmts { + if let TypedStmtKind::Function(func) = &stmt.kind { + self.lower_function(func); + } + } + } + + fn lower_struct_decl( + &mut self, + name: &str, + type_params: &[String], + fields: &[(String, InferType)], + span: &aelys_syntax::Span, + ) { + let air_type_params = self.lower_type_params(type_params); + let air_fields = fields + .iter() + .map(|(fname, fty)| AirStructField { + name: fname.clone(), + ty: self.lower_type_from_infer(fty), + offset: None, + }) + .collect(); + self.structs.push(AirStructDef { + name: name.to_string(), + type_params: air_type_params, + fields: air_fields, + is_closure_env: false, + span: Some(self.span(span)), + }); + self.type_params_map.clear(); + } + + fn lower_enum_decl( + &mut self, + name: &str, + type_params: &[String], + variants: &[(String, u32, Vec)], + span: &aelys_syntax::Span, + ) { + let air_type_params = self.lower_type_params(type_params); + let air_variants = variants + .iter() + .map(|(vname, vtag, data)| AirEnumVariant { + name: vname.clone(), + tag: *vtag, + payload: data + .iter() + .map(|ty| self.lower_type_from_infer(ty)) + .collect(), + }) + .collect(); + self.enums.push(AirEnumDef { + name: name.to_string(), + type_params: air_type_params, + variants: air_variants, + span: Some(self.span(span)), + }); + self.type_params_map.clear(); + } + + pub(super) fn lower_function(&mut self, func: &TypedFunction) { + let saved_locals = std::mem::take(&mut self.current_locals); + let saved_params = std::mem::take(&mut self.current_params); + let saved_blocks = std::mem::take(&mut self.current_blocks); + let saved_stmts = std::mem::take(&mut self.current_stmts); + let saved_names = std::mem::take(&mut self.locals_by_name); + let saved_aliases = std::mem::take(&mut self.block_aliases); + let saved_pending = self.pending_block_id.take(); + let saved_next_local = self.next_local_id; + let saved_next_block = self.next_block_id; + self.next_local_id = 0; + self.next_block_id = 0; + + let func_id = self.alloc_function_id(); + let gc_mode = self.gc_mode_for_function(func); + let captures = self.runtime_captures(&func.captures); + + if !captures.is_empty() { + self.lower_closure(func, &captures, func_id, gc_mode); + } else { + self.lower_plain_function(func, func_id, gc_mode); + } + + self.current_locals = saved_locals; + self.current_params = saved_params; + self.current_blocks = saved_blocks; + self.current_stmts = saved_stmts; + self.locals_by_name = saved_names; + self.block_aliases = saved_aliases; + self.pending_block_id = saved_pending; + self.next_local_id = saved_next_local; + self.next_block_id = saved_next_block; + } + + fn lower_plain_function(&mut self, func: &TypedFunction, func_id: FunctionId, gc_mode: GcMode) { + let type_params = self.lower_type_params(&func.type_params); + let params = self.lower_params(&func.params); + let mut ret_ty = self.lower_type_from_infer(&func.return_type); + if ret_ty == AirType::Opaque { + self.report_error(format!( + "function `{}` has unresolved return type (Opaque); \ + treating as void — this indicates a type inference failure", + func.name + )); + ret_ty = AirType::Void; + } + // InferType::Null is used by sema for both the null literal *and* the implicit void return. + // In lower_type_from_infer it becomes Ptr(Void) (which is correct for the null literal), but as a return type it means void + if ret_ty == AirType::Ptr(Box::new(AirType::Void)) { + ret_ty = AirType::Void; + } + + self.lower_body(&func.body); + self.finalize_function_body(); + self.resolve_block_aliases(); + + let air_func = AirFunction { + id: func_id, + name: func.name.clone(), + gc_mode, + type_params, + params, + ret_ty, + locals: std::mem::take(&mut self.current_locals), + blocks: std::mem::take(&mut self.current_blocks), + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: self.func_attribs(func), + span: Some(self.span(&func.span)), + }; + self.functions.push(air_func); + self.type_params_map.clear(); + } + + fn lower_closure( + &mut self, + func: &TypedFunction, + captures: &[(String, InferType)], + func_id: FunctionId, + gc_mode: GcMode, + ) { + let type_params = self.lower_type_params(&func.type_params); + + let env_name = format!("__closure_env_{}", func.name); + let env_fields: Vec = captures + .iter() + .map(|(name, ty)| AirStructField { + name: name.clone(), + ty: self.lower_type_from_infer(ty), + offset: None, + }) + .collect(); + + self.structs.push(AirStructDef { + name: env_name.clone(), + type_params: Vec::new(), + fields: env_fields, + is_closure_env: true, + span: Some(self.span(&func.span)), + }); + + let env_param_id = self.alloc_local_id(); + let env_ty = AirType::Ptr(Box::new(AirType::Struct(env_name.clone()))); + self.current_params.push(AirParam { + id: env_param_id, + ty: env_ty.clone(), + name: "__env".to_string(), + span: Some(self.span(&func.span)), + }); + + for (cap_name, cap_ty) in captures { + let local_id = + self.alloc_named_local(cap_name, self.lower_type_from_infer(cap_ty), false, None); + self.emit( + AirStmtKind::Assign { + place: Place::Local(local_id), + rvalue: Rvalue::FieldAccess { + base: Operand::Copy(env_param_id), + field: cap_name.clone(), + }, + }, + None, + ); + } + + // Track env param and captured names so assignments to captures write back + let saved_env_param = self.closure_env_param.replace(env_param_id); + let saved_captures = std::mem::replace( + &mut self.closure_captures, + captures.iter().map(|(n, _)| n.clone()).collect(), + ); + + let user_params = self.lower_params(&func.params); + let mut ret_ty = self.lower_type_from_infer(&func.return_type); + if ret_ty == AirType::Opaque { + self.report_error(format!( + "closure `{}` has unresolved return type (Opaque); \ + treating as void — this indicates a type inference failure", + func.name + )); + ret_ty = AirType::Void; + } + if ret_ty == AirType::Ptr(Box::new(AirType::Void)) { + ret_ty = AirType::Void; + } + + self.lower_body(&func.body); + self.finalize_function_body(); + self.resolve_block_aliases(); + + // Restore outer closure context (supports nested closures) + self.closure_env_param = saved_env_param; + self.closure_captures = saved_captures; + + let mut all_params = vec![self.current_params.remove(0)]; + all_params.extend(user_params); + + let air_func = AirFunction { + id: func_id, + name: func.name.clone(), + gc_mode, + type_params, + params: all_params, + ret_ty, + locals: std::mem::take(&mut self.current_locals), + blocks: std::mem::take(&mut self.current_blocks), + is_extern: false, + calling_conv: CallingConv::Aelys, + attributes: self.func_attribs(func), + span: Some(self.span(&func.span)), + }; + self.functions.push(air_func); + self.type_params_map.clear(); + } + + /// Lower a function always taking the closure path (with __env param), + /// regardless of whether it has captures. This ensures every lambda gets an + /// __env parameter, so the calling convention is uniform: a call site that + /// receives `fn(i64) -> i64` can always pass env as the first argument + /// without caring whether it's a capturing closure or a bare lambda. + pub(super) fn lower_function_as_closure(&mut self, func: &TypedFunction) { + let saved_locals = std::mem::take(&mut self.current_locals); + let saved_params = std::mem::take(&mut self.current_params); + let saved_blocks = std::mem::take(&mut self.current_blocks); + let saved_stmts = std::mem::take(&mut self.current_stmts); + let saved_names = std::mem::take(&mut self.locals_by_name); + let saved_aliases = std::mem::take(&mut self.block_aliases); + let saved_pending = self.pending_block_id.take(); + let saved_next_local = self.next_local_id; + let saved_next_block = self.next_block_id; + self.next_local_id = 0; + self.next_block_id = 0; + + let func_id = self.alloc_function_id(); + let gc_mode = self.gc_mode_for_function(func); + let captures = self.runtime_captures(&func.captures); + + // Always take the closure path + self.lower_closure(func, &captures, func_id, gc_mode); + + self.current_locals = saved_locals; + self.current_params = saved_params; + self.current_blocks = saved_blocks; + self.current_stmts = saved_stmts; + self.locals_by_name = saved_names; + self.block_aliases = saved_aliases; + self.pending_block_id = saved_pending; + self.next_local_id = saved_next_local; + self.next_block_id = saved_next_block; + } + + pub(super) fn runtime_captures(&self, captures: &[(String, InferType)]) -> Vec<(String, InferType)> { + // File-scope lets live in global storage, not in closure environments. + captures + .iter() + .filter(|(name, _)| !self.globals.iter().any(|global| global.name == *name)) + .cloned() + .collect() + } + + pub(super) fn lower_params(&mut self, params: &[TypedParam]) -> Vec { + params + .iter() + .map(|p| { + let ty = self.lower_type_from_infer(&p.ty); + let id = self.alloc_named_local( + &p.name, + ty.clone(), + p.mutable, + Some(self.span(&p.span)), + ); + AirParam { + id, + ty, + name: p.name.clone(), + span: Some(self.span(&p.span)), + } + }) + .collect() + } + + pub(super) fn func_attribs(&self, func: &TypedFunction) -> FunctionAttribs { + let inline = if func.decorators.iter().any(|d| d.name == "inline_always") { + InlineHint::Always + } else if func.decorators.iter().any(|d| d.name == "inline_never") { + InlineHint::Never + } else { + InlineHint::Default + }; + FunctionAttribs { + inline, + no_gc: func.decorators.iter().any(|d| d.name == "no_gc"), + no_unwind: false, + cold: func.decorators.iter().any(|d| d.name == "cold"), + } + } + + fn lower_toplevel_stmt(&mut self, stmt: &aelys_sema::TypedStmt) { + if let TypedStmtKind::Let { + name, + initializer, + var_type, + .. + } = &stmt.kind + { + let ty = self.lower_type_from_infer(var_type); + let init = self.try_global_const_expr(initializer); + if let Some(message) = self.global_initializer_error(name, &ty, init.as_ref()) { + self.report_error(message); + } + self.globals.push(AirGlobal { + name: name.clone(), + ty, + init, + gc_mode: self.file_gc_mode, + span: Some(self.span(&stmt.span)), + }); + } + } + + /// Try to evaluate an expression as a compile-time constant for a global initializer. + /// + /// Extends `try_const_expr` (which is `&self`) to also handle: + /// - Non-capturing lambdas: emitted as closure-convention functions, returned as `FnRef` + /// - Compound types (Array, Struct) whose elements may themselves be lambdas + pub(super) fn try_global_const_expr(&mut self, expr: &aelys_sema::TypedExpr) -> Option { + use aelys_sema::TypedExprKind; + match &expr.kind { + // Peel Lambda wrapper + TypedExprKind::Lambda(inner) => self.try_global_const_expr(inner), + // Non-capturing lambda → emit as a named closure-convention function + TypedExprKind::LambdaInner { params, return_type, body, captures } => { + let runtime_caps = self.runtime_captures(captures); + if !runtime_caps.is_empty() { + return None; // capturing lambdas cannot be global constants + } + let lambda_name = format!("__lambda_{}", self.next_function_id); + let fake_func = TypedFunction { + name: lambda_name.clone(), + type_params: Vec::new(), + params: params.clone(), + return_type: return_type.clone(), + body: body.clone(), + decorators: Vec::new(), + is_pub: false, + span: expr.span, + captures: Vec::new(), + }; + self.lower_function_as_closure(&fake_func); + Some(AirConst::FnRef(lambda_name)) + } + // Array literals may contain lambdas as elements + TypedExprKind::ArrayLiteral { elements } => { + let elements = elements.clone(); + let mut consts = Vec::with_capacity(elements.len()); + for e in &elements { + consts.push(self.try_global_const_expr(e)?); + } + Some(AirConst::Array(consts)) + } + // Struct literals may contain lambdas as field values + TypedExprKind::StructLiteral { name, fields } => { + let name = name.clone(); + let fields = fields.clone(); + let mut field_consts = Vec::with_capacity(fields.len()); + for (fname, fexpr) in &fields { + field_consts.push((fname.clone(), self.try_global_const_expr(fexpr)?)); + } + Some(AirConst::Struct { name, fields: field_consts }) + } + // Everything else delegates to the immutable version + _ => self.try_const_expr(expr), + } + } + + fn global_initializer_error( + &self, + name: &str, + ty: &AirType, + init: Option<&AirConst>, + ) -> Option { + let Some(init) = init else { + return Some(format!( + "file-scope let '{name}' requires a compile-time constant initializer" + )); + }; + if matches!(ty, AirType::Enum(_)) && Self::enum_payload_needs_runtime_storage(init) { + // Data enum globals are serialized as raw constant bytes today. + // Payloads that smuggle runtime addresses (string/fnptr) must fail here + // instead of drifting into a later backend-only error. + return Some(format!( + "file-scope let '{name}' uses enum payload values with runtime-backed storage (`str`/`fnptr`), which globals cannot serialize yet" + )); + } + None + } + + fn enum_payload_needs_runtime_storage(init: &AirConst) -> bool { + match init { + AirConst::Str(_) | AirConst::FnRef(_) => true, + AirConst::Enum { payload, .. } => payload + .iter() + .any(Self::enum_payload_needs_runtime_storage), + _ => false, + } + } + + fn resolve_const_global_alias(&self, name: &str) -> Option { + let mut current = name.to_string(); + let mut seen = std::collections::HashSet::new(); + + loop { + if !seen.insert(current.clone()) { + return None; + } + + let global = self.globals.iter().find(|global| global.name == current)?; + let init = global.init.as_ref()?.clone(); + match init { + // Follow fnptr aliases through prior globals until we reach the real symbol. + AirConst::FnRef(target) if self.globals.iter().any(|global| global.name == target) => { + current = target; + } + other => return Some(other), + } + } + } + + pub(super) fn try_const_expr(&self, expr: &aelys_sema::TypedExpr) -> Option { + use aelys_sema::TypedExprKind; + match &expr.kind { + TypedExprKind::Int(v) => { + if expr.ty.is_integer() { + Some(AirConst::Int(*v, super::infer_to_int_size(&expr.ty))) + } else { + Some(AirConst::IntLiteral(*v)) + } + } + TypedExprKind::Float(v) => { + let size = if matches!(expr.ty, InferType::F32) { + AirFloatSize::F32 + } else { + AirFloatSize::F64 + }; + Some(AirConst::Float(*v, size)) + } + TypedExprKind::Bool(v) => Some(AirConst::Bool(*v)), + TypedExprKind::String(v) => Some(AirConst::Str(v.clone())), + TypedExprKind::Null => Some(AirConst::Null), + TypedExprKind::Identifier(name) => { + if let Some(existing) = self.resolve_const_global_alias(name) { + Some(existing) + } else if matches!(expr.ty, InferType::Function { .. }) { + Some(AirConst::FnRef(name.clone())) + } else { + None + } + } + TypedExprKind::EnumVariant { + enum_name, + variant, + tag, + args, + } + if args.is_empty() + && self + .program + .type_table + .get_enum(enum_name) + .is_some_and(|def| def + .variants + .iter() + .any(|candidate| candidate.name == *variant && candidate.data.is_empty())) => + { + // Unit variants are constant at AIR level. Simple enums stay bare + // i32 tags; data enums rebuild the aggregate from the tag later. + Some(AirConst::Int(*tag as i64, AirIntSize::I32)) + } + TypedExprKind::EnumVariant { tag, args, .. } => { + let payload = args + .iter() + .map(|arg| self.try_const_expr(arg)) + .collect::>>()?; + // Globals skip EnumInit AIR, so payload-bearing enum constants need + // to carry the concrete monomorphized enum name here. + let AirType::Enum(enum_name) = self.lower_type_from_infer(&expr.ty) else { + return None; + }; + Some(AirConst::Enum { + enum_name, + tag: *tag, + payload, + }) + } + TypedExprKind::ArrayLiteral { elements } => { + let consts: Option> = + elements.iter().map(|e| self.try_const_expr(e)).collect(); + consts.map(AirConst::Array) + } + TypedExprKind::ArraySized { size, fill_value } => { + // [val; N] is constant if val is constant and N is a literal + let n = if let TypedExprKind::Int(n) = &size.kind { + Some(*n as usize) + } else { + None + }?; + let fill = fill_value.as_ref().and_then(|fv| self.try_const_expr(fv))?; + Some(AirConst::Array(vec![fill; n])) + } + TypedExprKind::StructLiteral { name, fields } => { + let field_consts: Option> = fields + .iter() + .map(|(fname, fexpr)| { + self.try_const_expr(fexpr).map(|c| (fname.clone(), c)) + }) + .collect(); + field_consts.map(|fields| AirConst::Struct { name: name.clone(), fields }) + } + _ => None, + } + } +} diff --git a/air/src/lower/stmts.rs b/air/src/lower/stmts.rs new file mode 100644 index 0000000..2f3b99a --- /dev/null +++ b/air/src/lower/stmts.rs @@ -0,0 +1,349 @@ +use super::LoweringContext; +use crate::*; +use aelys_sema::{InferType, TypedExprKind, TypedStmt, TypedStmtKind}; + +impl<'a> LoweringContext<'a> { + pub(super) fn lower_body(&mut self, stmts: &[TypedStmt]) { + // Save the current scope depth so inner `let` bindings don't leak out. + let scope_depth = self.locals_by_name.len(); + for stmt in stmts { + self.lower_stmt(stmt); + } + self.locals_by_name.truncate(scope_depth); + } + + pub(super) fn finalize_function_body(&mut self) { + // seal any pending block (for example loop exit blocks) or unsealed statements with implicit return + if self.pending_block_id.is_some() + || (self.current_stmts.is_empty() && self.current_blocks.is_empty()) + || !self.current_stmts.is_empty() + { + self.seal_block(AirTerminator::Return(None)); + } + } + + pub(super) fn lower_stmt(&mut self, stmt: &TypedStmt) { + let sp = Some(self.span(&stmt.span)); + match &stmt.kind { + TypedStmtKind::Expression(expr) => { + self.lower_expr_discard(expr); + } + TypedStmtKind::Let { + name, + mutable, + initializer, + var_type, + .. + } => { + let ty = self.lower_type_from_infer(var_type); + // Optimization: for array initializers, emit stores directly to the named local + // instead of going through a temp + copy + if matches!(ty, AirType::Array(_, _)) { + match &initializer.kind { + TypedExprKind::ArrayLiteral { elements, .. } => { + // Evaluate all elements before registering the name so that + // `let arr = [arr[0], 1, 2]` reads the *outer* arr, not itself. + let elem_ops: Vec = + elements.iter().map(|e| self.lower_expr(e)).collect(); + let local = self.alloc_named_local(name, ty, true, sp); + for (i, elem_op) in elem_ops.into_iter().enumerate() { + self.emit( + AirStmtKind::Assign { + place: Place::Index( + local, + Operand::Const(AirConst::IntLiteral(i as i64)), + ), + rvalue: Rvalue::Use(elem_op), + }, + sp, + ); + } + return; + } + TypedExprKind::ArraySized { + size, fill_value, .. + } => { + let n = match &size.kind { + TypedExprKind::Int(v) => *v as u64, + _ => { + self.report_error( + "unsupported non-constant array size: \ + ArraySized requires a constant integer size expression" + .to_string(), + ); + 0 + } + }; + let elem_air_ty = match var_type { + InferType::Array(inner, _) => self.lower_type_from_infer(inner), + _ => AirType::I64, + }; + self.check_stack_array_size(&elem_air_ty, n); + // Evaluate fill value before registering the name. + let fill_op = if let Some(fv) = fill_value { + self.lower_expr(fv) + } else { + let elem_ty = match var_type { + InferType::Array(inner, _) => self.lower_type_from_infer(inner), + _ => AirType::I64, + }; + Operand::Const(AirConst::ZeroInit(elem_ty)) + }; + let local = self.alloc_named_local(name, ty, true, sp); + for i in 0..n { + self.emit( + AirStmtKind::Assign { + place: Place::Index( + local, + Operand::Const(AirConst::IntLiteral(i as i64)), + ), + rvalue: Rvalue::Use(fill_op.clone()), + }, + sp, + ); + } + return; + } + _ => {} + } + } + // Evaluate the initializer before registering the name so that + // `let x = x + 1` reads the *outer* x, not the new binding. + let operand = self.lower_expr(initializer); + let local = self.alloc_named_local(name, ty, *mutable, sp); + self.emit( + AirStmtKind::Assign { + place: Place::Local(local), + rvalue: Rvalue::Use(operand), + }, + sp, + ); + } + TypedStmtKind::Block(stmts) => { + self.lower_body(stmts); + } + TypedStmtKind::If { + condition, + then_branch, + else_branch, + } => { + self.lower_if(condition, then_branch, else_branch.as_deref(), sp); + } + TypedStmtKind::While { condition, body } => { + self.lower_while(condition, body, sp); + } + TypedStmtKind::For { + iterator, + start, + end, + inclusive, + step, + body, + } => { + self.lower_for(iterator, start, end, *inclusive, step.as_ref(), body); + } + TypedStmtKind::ForEach { + iterator, + iterable, + elem_type, + body, + } => { + self.lower_foreach(iterator, iterable, elem_type, body, sp); + } + TypedStmtKind::Return(val) => { + if let Some(e) = val { + let ret_ty = self.lower_type_from_infer(&e.ty); + // opaque means the return type is unresolved Dynamic (e.g. an implicit return of a print/println call). + // lower the expression for side effects only and emit a void return. + if matches!(ret_ty, AirType::Opaque) { + self.lower_expr_discard(e); + self.seal_block(AirTerminator::Return(None)); + return; + } + } + let operand = val.as_ref().map(|e| self.lower_expr(e)); + self.seal_block(AirTerminator::Return(operand)); + } + TypedStmtKind::Break => { + if let Some(loop_ctx) = self.loop_stack.last() { + let exit = loop_ctx.exit; + self.seal_block(AirTerminator::Goto(exit)); + } else { + // break outside loop: sema should have rejected this, but + // seal the block to prevent malformed AIR during error recovery. + self.report_error("break statement outside of loop".to_string()); + self.seal_block(AirTerminator::Unreachable); + } + } + TypedStmtKind::Continue => { + if let Some(loop_ctx) = self.loop_stack.last() { + let header = loop_ctx.header; + self.seal_block(AirTerminator::Goto(header)); + } else { + self.report_error("continue statement outside of loop".to_string()); + self.seal_block(AirTerminator::Unreachable); + } + } + TypedStmtKind::Function(func) => { + self.lower_function(func); + } + TypedStmtKind::Needs(_) + | TypedStmtKind::StructDecl { .. } + | TypedStmtKind::EnumDecl { .. } => {} + } + } + + // control flow desugaring + pub(super) fn lower_if( + &mut self, + condition: &aelys_sema::TypedExpr, + then_branch: &TypedStmt, + else_branch: Option<&TypedStmt>, + _sp: Option, + ) { + let cond = self.lower_expr(condition); + let then_id = self.alloc_block_id(); + let else_id = self.alloc_block_id(); + let merge_id = self.alloc_block_id(); + + self.seal_block(AirTerminator::Branch { + cond, + then_block: then_id, + else_block: if else_branch.is_some() { + else_id + } else { + merge_id + }, + }); + + self.fixup_block_id_noop(then_id); + self.lower_stmt(then_branch); + if !self.last_block_is_terminated() { + self.seal_block(AirTerminator::Goto(merge_id)); + } + + if let Some(else_br) = else_branch { + self.fixup_block_id_noop(else_id); + self.lower_stmt(else_br); + if !self.last_block_is_terminated() { + self.seal_block(AirTerminator::Goto(merge_id)); + } + } + + self.fixup_block_id_noop(merge_id); + } + + pub(super) fn lower_while( + &mut self, + condition: &aelys_sema::TypedExpr, + body: &TypedStmt, + _sp: Option, + ) { + let header_id = self.alloc_block_id(); + let body_id = self.alloc_block_id(); + let exit_id = self.alloc_block_id(); + + self.seal_block(AirTerminator::Goto(header_id)); + + self.fixup_block_id_noop(header_id); + let cond = self.lower_expr(condition); + self.seal_block(AirTerminator::Branch { + cond, + then_block: body_id, + else_block: exit_id, + }); + + self.loop_stack.push(super::LoopBlocks { + header: header_id, + exit: exit_id, + }); + self.fixup_block_id_noop(body_id); + self.lower_stmt(body); + if !self.last_block_is_terminated() { + self.seal_block(AirTerminator::Goto(header_id)); + } + self.loop_stack.pop(); + + self.fixup_block_id_noop(exit_id); + } + + /// the old fixup_block_id(X) renamed the last sealed block to X + /// With nested control flow that creates multiple blocks, the last block is + /// some inner merge, not the branch entry. This nuked entire loop bodies. + /// Now we set the pending id *before* lowering so the first seal_block picks it up. + pub(super) fn fixup_block_id_noop(&mut self, target: BlockId) { + if let Some(old) = self.pending_block_id { + if old != target { + self.block_aliases.push((old.0, target.0)); + } + } + self.pending_block_id = Some(target); + } + + pub(super) fn resolve_block_aliases(&mut self) { + if self.block_aliases.is_empty() { + return; + } + let resolve = |id: &mut BlockId, aliases: &[(u32, u32)]| { + let mut current = id.0; + for _ in 0..aliases.len() { + if let Some(&(_, to)) = aliases.iter().find(|(from, _)| *from == current) { + current = to; + } else { + break; + } + } + *id = BlockId(current); + }; + for block in &mut self.current_blocks { + let aliases = &self.block_aliases; + match &mut block.terminator { + AirTerminator::Goto(id) => resolve(id, aliases), + AirTerminator::Branch { + then_block, + else_block, + .. + } => { + resolve(then_block, aliases); + resolve(else_block, aliases); + } + AirTerminator::Switch { + targets, default, .. + } => { + for (_, id) in targets { + resolve(id, aliases); + } + resolve(default, aliases); + } + AirTerminator::Invoke { normal, unwind, .. } => { + resolve(normal, aliases); + resolve(unwind, aliases); + } + AirTerminator::Return(_) + | AirTerminator::Unreachable + | AirTerminator::Unwind + | AirTerminator::Panic { .. } => {} + } + } + self.block_aliases.clear(); + } + + pub(super) fn last_block_is_terminated(&self) -> bool { + // A pending (unsealed) block is never "terminated" — even if the last + // *sealed* block happens to have a definitive terminator (e.g. the + // default arm of a match has `unreachable`). Returning true here + // would cause the while-loop continuation to skip the `Goto(header)` + // seal, leaving the pending merge block aliased to the loop exit. + if self.pending_block_id.is_some() { + return false; + } + // a block is terminated if it has a terminator than Goto + // Goto is a fallthrough to a merge block, not a definitive exit + // other terminator return, unreachable, branch etc are definitive exits. + self.current_stmts.is_empty() + && self + .current_blocks + .last() + .is_some_and(|b| !matches!(b.terminator, AirTerminator::Goto(_))) + } +} diff --git a/air/src/mono.rs b/air/src/mono.rs deleted file mode 100644 index b4d8c2d..0000000 --- a/air/src/mono.rs +++ /dev/null @@ -1,468 +0,0 @@ -use crate::*; -use std::collections::{HashMap, HashSet}; - -type MangledMap<'a> = HashMap<&'a str, Vec<(&'a (String, Vec), &'a String)>>; - -pub fn monomorphize(mut program: AirProgram) -> AirProgram { - let mut ctx = MonoContext::new(&program); - ctx.collect_mono_requests(&program); - ctx.instantiate(&mut program); - ctx.rewrite_call_sites(&mut program); - program.functions.retain(|f| f.type_params.is_empty()); - program -} - -struct MonoContext { - generic_functions: HashMap, - requests: Vec, - instantiated: HashMap<(String, Vec), String>, - next_function_id: u32, -} - -struct MonoRequest { - function_name: String, - type_args: Vec, -} - -impl MonoContext { - fn new(program: &AirProgram) -> Self { - let generic_functions: HashMap = program - .functions - .iter() - .enumerate() - .filter(|(_, f)| !f.type_params.is_empty()) - .map(|(i, f)| (f.name.clone(), i)) - .collect(); - - Self { - generic_functions, - requests: Vec::new(), - instantiated: HashMap::new(), - next_function_id: program.functions.len() as u32, - } - } - - fn collect_mono_requests(&mut self, program: &AirProgram) { - let generic_names: HashSet = self.generic_functions.keys().cloned().collect(); - - for func in &program.functions { - if func.type_params.is_empty() { - self.collect_from_function(func, program, &generic_names); - } - } - } - - fn collect_from_function( - &mut self, - func: &AirFunction, - program: &AirProgram, - generic_names: &HashSet, - ) { - for block in &func.blocks { - for stmt in &block.stmts { - self.collect_from_stmt(stmt, func, program, generic_names); - } - self.collect_from_terminator(&block.terminator, func, program, generic_names); - } - } - - fn collect_from_stmt( - &mut self, - stmt: &AirStmt, - caller: &AirFunction, - program: &AirProgram, - generic_names: &HashSet, - ) { - match &stmt.kind { - AirStmtKind::Assign { - rvalue: Rvalue::Call { func: callee, args }, - .. - } => { - self.try_collect(callee, args, caller, program, generic_names); - } - AirStmtKind::CallVoid { func: callee, args } => { - self.try_collect(callee, args, caller, program, generic_names); - } - _ => {} - } - } - - fn collect_from_terminator( - &mut self, - term: &AirTerminator, - caller: &AirFunction, - program: &AirProgram, - generic_names: &HashSet, - ) { - if let AirTerminator::Invoke { - func: callee, args, .. - } = term - { - self.try_collect(callee, args, caller, program, generic_names); - } - } - - fn try_collect( - &mut self, - callee: &Callee, - args: &[Operand], - caller: &AirFunction, - program: &AirProgram, - generic_names: &HashSet, - ) { - let name = match callee { - Callee::Named(n) if generic_names.contains(n) => n, - _ => return, - }; - - let func_idx = self.generic_functions[name]; - let generic_func = &program.functions[func_idx]; - - if let Some(type_args) = self.infer_type_args(generic_func, args, caller) { - let key = (name.clone(), self.type_args_key(&type_args)); - if !self.instantiated.contains_key(&key) { - self.requests.push(MonoRequest { - function_name: name.clone(), - type_args, - }); - } - } - } - - fn infer_type_args( - &self, - generic_func: &AirFunction, - args: &[Operand], - caller: &AirFunction, - ) -> Option> { - let mut resolved: HashMap = HashMap::new(); - - for (param, arg) in generic_func.params.iter().zip(args.iter()) { - let arg_ty = self.operand_type(arg, caller); - self.unify_param(¶m.ty, &arg_ty, &mut resolved); - } - - let mut type_args = Vec::with_capacity(generic_func.type_params.len()); - for tp in &generic_func.type_params { - type_args.push(resolved.get(&tp.0)?.clone()); - } - Some(type_args) - } - - fn unify_param( - &self, - param_ty: &AirType, - arg_ty: &AirType, - resolved: &mut HashMap, - ) { - match param_ty { - AirType::Param(id) => { - resolved.entry(id.0).or_insert_with(|| arg_ty.clone()); - } - AirType::Ptr(inner) => { - if let AirType::Ptr(arg_inner) = arg_ty { - self.unify_param(inner, arg_inner, resolved); - } - } - AirType::Array(inner, _) => { - if let AirType::Array(arg_inner, _) = arg_ty { - self.unify_param(inner, arg_inner, resolved); - } - } - AirType::Slice(inner) => { - if let AirType::Slice(arg_inner) = arg_ty { - self.unify_param(inner, arg_inner, resolved); - } - } - AirType::FnPtr { params, ret, .. } => { - if let AirType::FnPtr { - params: arg_params, - ret: arg_ret, - .. - } = arg_ty - { - for (p, a) in params.iter().zip(arg_params.iter()) { - self.unify_param(p, a, resolved); - } - self.unify_param(ret, arg_ret, resolved); - } - } - _ => {} - } - } - - fn operand_type(&self, operand: &Operand, caller: &AirFunction) -> AirType { - match operand { - Operand::Const(c) => match c { - AirConst::IntLiteral(_) => AirType::I64, - AirConst::Int(_, size) => match size { - AirIntSize::I8 => AirType::I8, - AirIntSize::I16 => AirType::I16, - AirIntSize::I32 => AirType::I32, - AirIntSize::I64 => AirType::I64, - AirIntSize::U8 => AirType::U8, - AirIntSize::U16 => AirType::U16, - AirIntSize::U32 => AirType::U32, - AirIntSize::U64 => AirType::U64, - }, - AirConst::Float(_, size) => match size { - AirFloatSize::F32 => AirType::F32, - AirFloatSize::F64 => AirType::F64, - }, - AirConst::Bool(_) => AirType::Bool, - AirConst::Str(_) => AirType::Str, - AirConst::Null => AirType::Void, - AirConst::ZeroInit(ty) | AirConst::Undef(ty) => ty.clone(), - }, - Operand::Copy(id) | Operand::Move(id) => caller - .params - .iter() - .find(|p| p.id == *id) - .map(|p| p.ty.clone()) - .or_else(|| { - caller - .locals - .iter() - .find(|l| l.id == *id) - .map(|l| l.ty.clone()) - }) - .unwrap_or(AirType::I64), - } - } - - fn instantiate(&mut self, program: &mut AirProgram) { - let mut new_functions = Vec::new(); - let mut mono_instances = Vec::new(); - - for request in &self.requests { - let key = ( - request.function_name.clone(), - self.type_args_key(&request.type_args), - ); - - if self.instantiated.contains_key(&key) { - continue; - } - - let func_idx = self.generic_functions[&request.function_name]; - let original_func = &program.functions[func_idx]; - let original_id = original_func.id; - let new_id = FunctionId(self.next_function_id); - self.next_function_id += 1; - - let mangled_name = self.mangle_name(&request.function_name, &request.type_args); - let saved_type_params = original_func.type_params.clone(); - let mut new_func = original_func.clone(); - new_func.id = new_id; - new_func.name = mangled_name.clone(); - new_func.type_params = Vec::new(); - - self.substitute_types_in_function( - &mut new_func, - &saved_type_params, - &request.type_args, - ); - - new_functions.push(new_func); - self.instantiated.insert(key, mangled_name); - - mono_instances.push(MonoInstance { - original: original_id, - type_args: request.type_args.clone(), - result: new_id, - }); - } - - program.functions.extend(new_functions); - program.mono_instances.extend(mono_instances); - } - - fn rewrite_call_sites(&self, program: &mut AirProgram) { - let name_to_mangled: MangledMap = { - let mut map: HashMap<&str, Vec<_>> = HashMap::new(); - for (key, mangled) in &self.instantiated { - map.entry(key.0.as_str()).or_default().push((key, mangled)); - } - map - }; - - if name_to_mangled.is_empty() { - return; - } - - for func in &mut program.functions { - if !func.type_params.is_empty() { - continue; - } - for block in &mut func.blocks { - for stmt in &mut block.stmts { - self.rewrite_stmt(stmt, &name_to_mangled); - } - self.rewrite_terminator(&mut block.terminator, &name_to_mangled); - } - } - } - - fn rewrite_stmt(&self, stmt: &mut AirStmt, name_map: &MangledMap) { - match &mut stmt.kind { - AirStmtKind::Assign { - rvalue: Rvalue::Call { func: callee, .. }, - .. - } => { - self.rewrite_callee(callee, name_map); - } - AirStmtKind::CallVoid { func: callee, .. } => { - self.rewrite_callee(callee, name_map); - } - _ => {} - } - } - - fn rewrite_terminator(&self, term: &mut AirTerminator, name_map: &MangledMap) { - if let AirTerminator::Invoke { func: callee, .. } = term { - self.rewrite_callee(callee, name_map); - } - } - - fn rewrite_callee(&self, callee: &mut Callee, name_map: &MangledMap) { - if let Callee::Named(name) = callee - && let Some(entries) = name_map.get(name.as_str()) - && let Some((_, mangled)) = entries.first() - { - *name = (*mangled).clone(); - } - } - - fn type_args_key(&self, types: &[AirType]) -> Vec { - types.iter().map(type_to_string).collect() - } - - fn mangle_name(&self, name: &str, type_args: &[AirType]) -> String { - if type_args.is_empty() { - return name.to_string(); - } - let type_str = type_args - .iter() - .map(type_to_string) - .collect::>() - .join("_"); - format!("__mono_{}_{}", name, type_str) - } - - fn substitute_types_in_function( - &self, - func: &mut AirFunction, - type_params: &[TypeParamId], - type_args: &[AirType], - ) { - for param in &mut func.params { - substitute_type(&mut param.ty, type_params, type_args); - } - substitute_type(&mut func.ret_ty, type_params, type_args); - - for local in &mut func.locals { - substitute_type(&mut local.ty, type_params, type_args); - } - - for block in &mut func.blocks { - for stmt in &mut block.stmts { - substitute_stmt(stmt, type_params, type_args); - } - substitute_terminator(&mut block.terminator, type_params, type_args); - } - } -} - -fn type_to_string(ty: &AirType) -> String { - match ty { - AirType::I8 => "i8".to_string(), - AirType::I16 => "i16".to_string(), - AirType::I32 => "i32".to_string(), - AirType::I64 => "i64".to_string(), - AirType::U8 => "u8".to_string(), - AirType::U16 => "u16".to_string(), - AirType::U32 => "u32".to_string(), - AirType::U64 => "u64".to_string(), - AirType::F32 => "f32".to_string(), - AirType::F64 => "f64".to_string(), - AirType::Bool => "bool".to_string(), - AirType::Str => "str".to_string(), - AirType::Ptr(inner) => format!("ptr_{}", type_to_string(inner)), - AirType::Struct(name) => name.clone(), - AirType::Array(inner, size) => format!("array_{}_{}", type_to_string(inner), size), - AirType::Slice(inner) => format!("slice_{}", type_to_string(inner)), - AirType::FnPtr { .. } => "fnptr".to_string(), - AirType::Param(id) => format!("param_{}", id.0), - AirType::Void => "void".to_string(), - } -} - -fn substitute_type(ty: &mut AirType, type_params: &[TypeParamId], type_args: &[AirType]) { - match ty { - AirType::Param(id) => { - if let Some(idx) = type_params.iter().position(|p| p == id) - && let Some(replacement) = type_args.get(idx) - { - *ty = replacement.clone(); - } - } - AirType::Ptr(inner) => substitute_type(inner, type_params, type_args), - AirType::Array(inner, _) => substitute_type(inner, type_params, type_args), - AirType::Slice(inner) => substitute_type(inner, type_params, type_args), - AirType::FnPtr { params, ret, .. } => { - for p in params { - substitute_type(p, type_params, type_args); - } - substitute_type(ret, type_params, type_args); - } - _ => {} - } -} - -fn substitute_stmt(stmt: &mut AirStmt, type_params: &[TypeParamId], type_args: &[AirType]) { - match &mut stmt.kind { - AirStmtKind::Assign { rvalue, .. } => { - substitute_rvalue(rvalue, type_params, type_args); - } - AirStmtKind::GcAlloc { ty, .. } | AirStmtKind::Alloc { ty, .. } => { - substitute_type(ty, type_params, type_args); - } - _ => {} - } -} - -fn substitute_rvalue(rvalue: &mut Rvalue, type_params: &[TypeParamId], type_args: &[AirType]) { - match rvalue { - Rvalue::Cast { from, to, .. } => { - substitute_type(from, type_params, type_args); - substitute_type(to, type_params, type_args); - } - Rvalue::StructInit { name, .. } => { - for tp in type_params { - if let Some(replacement) = type_args.get(tp.0 as usize) { - let mangled_suffix = type_to_string(replacement); - if name.contains("__mono_") { - continue; - } - *name = format!("__mono_{}_{}", name, mangled_suffix); - } - } - } - _ => {} - } -} - -fn substitute_terminator( - term: &mut AirTerminator, - type_params: &[TypeParamId], - type_args: &[AirType], -) { - if let AirTerminator::Invoke { func: callee, .. } = term { - substitute_callee(callee, type_params, type_args); - } -} - -fn substitute_callee(_callee: &mut Callee, _type_params: &[TypeParamId], _type_args: &[AirType]) { - // Callee rewriting happens in the separate rewrite_call_sites pass - // after all instances are known. No per-function substitution needed. -} diff --git a/air/src/mono/mod.rs b/air/src/mono/mod.rs new file mode 100644 index 0000000..5c2c28e --- /dev/null +++ b/air/src/mono/mod.rs @@ -0,0 +1,1095 @@ +mod rewrite; +pub(crate) mod substitute; + +use crate::*; +use std::collections::{HashMap, HashSet}; +use substitute::operand_type_from; + +pub fn monomorphize(mut program: AirProgram) -> Result> { + let mut ctx = MonoContext::new(&program); + ctx.collect_mono_requests(&program); + ctx.instantiate(&mut program); + ctx.rewrite_call_sites(&mut program); + program.functions.retain(|f| f.type_params.is_empty()); + + // Monomorphize generic enums + let errors = monomorphize_enums(&mut program); + if !errors.is_empty() { + return Err(errors); + } + + Ok(program) +} + +/// Monomorphize generic enum definitions. +/// +/// Scans all functions for `EnumInit`, `EnumTag`, and `EnumPayload` that reference +/// generic enums. Creates monomorphized copies of the enum definitions with concrete +/// types substituted in, and rewrites the enum_name references. +/// +/// The algorithm has three phases: +/// +/// 1. **Collection**: Scan all `EnumInit` sites with non-empty payload to infer `(enum_name, type_args)` pairs. Unit variants cannot contribute type args here. +/// +/// 2. **Local resolution**: Build a per-local mapping `LocalId -> mangled_name` by: +/// - Looking at non-unit `EnumInit` assignments to each local. +/// - Propagating from the function return type (for return statements). +/// - Propagating from already-resolved locals (for `Rvalue::Use(Copy(id))`). +/// - Falling back to a unique match when only one monomorphization exists. +/// +/// 3. **Rewriting**: Use the local mapping to rewrite unit variant `EnumInit`, +/// `EnumTag`, `EnumPayload`, and local types. +fn monomorphize_enums(program: &mut AirProgram) -> Vec { + let mut errors: Vec = Vec::new(); + // Collect generic enum indices + let generic_enums: HashMap = program + .enums + .iter() + .enumerate() + .filter(|(_, e)| !e.type_params.is_empty()) + .map(|(i, e)| (e.name.clone(), i)) + .collect(); + + if generic_enums.is_empty() { + return errors; + } + + // Phase 1: Collect all (enum_name, type_args) pairs from EnumInit sites + let mut enum_mono_requests: HashMap<(String, Vec), Vec> = HashMap::new(); + + for func in &program.functions { + for block in &func.blocks { + for stmt in &block.stmts { + if let AirStmtKind::Assign { rvalue, .. } = &stmt.kind { + collect_enum_mono_from_rvalue( + rvalue, + &generic_enums, + &program.enums, + func, + &mut enum_mono_requests, + ); + } + } + } + } + + // Also collect mono requests from pre-mangled local/param/return types. + // When sema preserves type args (e.g., Option), the lowering pass pre-computes + // the mangled name. We need to ensure the corresponding enum definition exists. + for func in &program.functions { + for local in &func.locals { + collect_premangled_enum_requests_from_type( + &local.ty, + &generic_enums, + &program.enums, + &mut enum_mono_requests, + ); + } + for param in &func.params { + collect_premangled_enum_requests_from_type( + ¶m.ty, + &generic_enums, + &program.enums, + &mut enum_mono_requests, + ); + } + collect_premangled_enum_requests_from_type( + &func.ret_ty, + &generic_enums, + &program.enums, + &mut enum_mono_requests, + ); + } + + if enum_mono_requests.is_empty() { + return errors; + } + + // Create monomorphized enum definitions + let mut mono_enum_names: HashMap<(String, Vec), String> = HashMap::new(); + + for (key, type_args) in &enum_mono_requests { + let (enum_name, _) = key; + let enum_idx = generic_enums[enum_name]; + let original = &program.enums[enum_idx]; + + let type_str = type_args + .iter() + .map(substitute::type_to_string) + .collect::>() + .join("$"); + let mangled_name = format!("__mono_{}_{}", enum_name, type_str); + + // Substitute type params in variant payload types + let mono_variants: Vec = original + .variants + .iter() + .map(|v| { + let mono_payload: Vec = v + .payload + .iter() + .map(|ty| substitute_enum_type(ty, &original.type_params, type_args)) + .collect(); + AirEnumVariant { + name: v.name.clone(), + tag: v.tag, + payload: mono_payload, + } + }) + .collect(); + + let mono_def = AirEnumDef { + name: mangled_name.clone(), + type_params: Vec::new(), + variants: mono_variants, + span: original.span, + }; + + program.enums.push(mono_def); + mono_enum_names.insert(key.clone(), mangled_name); + } + + // Phase 2: Build per-local mono resolution map for each function + // Phase 3: Rewrite enum_name references in all functions + for func in &mut program.functions { + let local_mono_map = resolve_local_enum_monos( + func, + &generic_enums, + &program.enums, + &mono_enum_names, + ); + + for block in &mut func.blocks { + for stmt in &mut block.stmts { + rewrite_enum_refs_in_stmt( + stmt, + &generic_enums, + &program.enums, + &mono_enum_names, + &func.params.clone(), + &func.locals.clone(), + &local_mono_map, + &mut errors, + ); + } + } + // Rewrite local types that reference generic enums + for local in &mut func.locals { + if let AirType::Enum(ref name) = local.ty { + if generic_enums.contains_key(name) { + // First try the resolved map from assignments + if let Some(mangled) = local_mono_map.get(&local.id) { + local.ty = AirType::Enum(mangled.clone()); + } else { + // Fall back to unique match + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == name) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + local.ty = AirType::Enum(mono_names[0].clone()); + } + } + } + } + } + // Also rewrite param types + for param in &mut func.params { + if let AirType::Enum(ref name) = param.ty { + if generic_enums.contains_key(name) { + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == name) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + param.ty = AirType::Enum(mono_names[0].clone()); + } + } + } + } + // Rewrite function return type + if let AirType::Enum(ref name) = func.ret_ty { + if generic_enums.contains_key(name) { + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == name) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + func.ret_ty = AirType::Enum(mono_names[0].clone()); + } + } + } + } + + // Remove generic enum definitions (they've been replaced by mono'd versions) + program.enums.retain(|e| e.type_params.is_empty()); + + errors +} + +/// Build a per-local mapping from `LocalId` to monomorphized enum name. +/// +/// For each local that holds a generic enum type, try to determine which specific +/// monomorphization it should use by examining: +/// 1. Non-unit `EnumInit` assignments to the local (payload types give us type args) +/// 2. `Rvalue::Use(Copy(other_local))` assignments (propagate from already-resolved locals) +/// 3. Function return type context (for locals used in return statements) +/// 4. Unique-match fallback (when only one monomorphization exists) +fn resolve_local_enum_monos( + func: &AirFunction, + generic_enums: &HashMap, + enum_defs: &[AirEnumDef], + mono_enum_names: &HashMap<(String, Vec), String>, +) -> HashMap { + let mut local_mono: HashMap = HashMap::new(); + + // Pass 0: If a local's or param's type was pre-mangled by the lowering pass + // (sema had concrete type args in the annotation), use that directly. + for local in &func.locals { + if let AirType::Enum(ref name) = local.ty { + if name.starts_with("__mono_") { + local_mono.insert(local.id, name.clone()); + } + } + } + for param in &func.params { + if let AirType::Enum(ref name) = param.ty { + if name.starts_with("__mono_") { + local_mono.insert(param.id, name.clone()); + } + } + } + + // Pass 1: Resolve locals that have non-unit EnumInit assignments + for block in &func.blocks { + for stmt in &block.stmts { + if let AirStmtKind::Assign { + place: Place::Local(local_id), + rvalue, + } = &stmt.kind + { + if let Rvalue::EnumInit { + enum_name, + payload, + .. + } = rvalue + { + if payload.is_empty() { + continue; // unit variant, skip for now + } + if let Some(&enum_idx) = generic_enums.get(enum_name.as_str()) { + let enum_def = &enum_defs[enum_idx]; + if let Some(type_args) = + infer_enum_type_args(enum_def, rvalue, func) + { + let key_strs: Vec = + type_args.iter().map(substitute::type_to_string).collect(); + let key = (enum_name.clone(), key_strs); + if let Some(mangled) = mono_enum_names.get(&key) { + local_mono.insert(*local_id, mangled.clone()); + } + } + } + } + } + } + } + + // Pass 2: Propagate through Use(Copy(other)) assignments and return type context + // Also check function return type for locals that appear in return terminators + let ret_ty_mono = if let AirType::Enum(ref name) = func.ret_ty { + if name.starts_with("__mono_") { + // Already monomorphized (e.g., from function monomorphization) + Some(name.clone()) + } else if generic_enums.contains_key(name) { + // Try unique match for the return type + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == name) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + Some(mono_names[0].clone()) + } else { + None + } + } else { + None + } + } else { + None + }; + + // Propagate from return terminators: if a local is returned and the function + // return type is a known mono enum, that local should use the same mono name. + if let Some(ref ret_mono) = ret_ty_mono { + for block in &func.blocks { + if let AirTerminator::Return(Some(Operand::Copy(local_id))) = &block.terminator { + let local_ty = func.locals.iter().find(|l| l.id == *local_id).map(|l| &l.ty); + if let Some(AirType::Enum(name)) = local_ty { + if generic_enums.contains_key(name) { + local_mono.entry(*local_id).or_insert_with(|| ret_mono.clone()); + } + } + } + } + } + + // Propagate from Use(Copy(source)) assignments + let mut changed = true; + while changed { + changed = false; + for block in &func.blocks { + for stmt in &block.stmts { + if let AirStmtKind::Assign { + place: Place::Local(target_id), + rvalue: Rvalue::Use(Operand::Copy(source_id)), + } = &stmt.kind + { + if !local_mono.contains_key(target_id) { + if let Some(mangled) = local_mono.get(source_id).cloned() { + local_mono.insert(*target_id, mangled); + changed = true; + } + } + } + } + } + } + + // Pass 3: For remaining unresolved locals with generic enum types, + // try unique-match fallback + for local in &func.locals { + if local_mono.contains_key(&local.id) { + continue; + } + if let AirType::Enum(ref name) = local.ty { + if generic_enums.contains_key(name) { + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == name) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + local_mono.insert(local.id, mono_names[0].clone()); + } + } + } + } + + local_mono +} + +fn substitute_enum_type( + ty: &AirType, + type_params: &[TypeParamId], + type_args: &[AirType], +) -> AirType { + match ty { + AirType::Param(id) => { + if let Some(idx) = type_params.iter().position(|p| p == id) { + if let Some(replacement) = type_args.get(idx) { + return replacement.clone(); + } + } + ty.clone() + } + AirType::Ptr(inner) => AirType::Ptr(Box::new(substitute_enum_type( + inner, + type_params, + type_args, + ))), + AirType::Array(inner, n) => AirType::Array( + Box::new(substitute_enum_type(inner, type_params, type_args)), + *n, + ), + AirType::Slice(inner) => AirType::Slice(Box::new(substitute_enum_type( + inner, + type_params, + type_args, + ))), + AirType::FnPtr { params, ret, conv } => AirType::FnPtr { + params: params + .iter() + .map(|p| substitute_enum_type(p, type_params, type_args)) + .collect(), + ret: Box::new(substitute_enum_type(ret, type_params, type_args)), + conv: *conv, + }, + AirType::Enum(name) => { + // The enum name may contain pre-mangled param references (e.g. + // "__mono_Option_param_0") when a generic enum definition has a + // variant whose payload is another generic enum parameterized by a + // type param. Replace each "param_N" segment with the concrete + // type arg so the name resolves to the correct monomorphized def. + let mut new_name = name.clone(); + for (i, param) in type_params.iter().enumerate() { + if let Some(replacement) = type_args.get(i) { + let param_str = substitute::type_to_string(&AirType::Param(*param)); + let replacement_str = substitute::type_to_string(replacement); + if param_str != replacement_str { + new_name = new_name.replace(¶m_str, &replacement_str); + } + } + } + AirType::Enum(new_name) + } + other => other.clone(), + } +} + +fn collect_enum_mono_from_rvalue( + rvalue: &Rvalue, + generic_enums: &HashMap, + enum_defs: &[AirEnumDef], + func: &AirFunction, + requests: &mut HashMap<(String, Vec), Vec>, +) { + match rvalue { + Rvalue::EnumInit { + enum_name, payload, .. + } => { + if let Some(&enum_idx) = generic_enums.get(enum_name) { + let enum_def = &enum_defs[enum_idx]; + // Infer type args from payload operand types + if let Some(type_args) = infer_enum_type_args(enum_def, rvalue, func) { + let key_strs: Vec = + type_args.iter().map(substitute::type_to_string).collect(); + let key = (enum_name.clone(), key_strs); + requests.entry(key).or_insert(type_args); + } else if payload.is_empty() { + // Unit variant -- can't infer type args from payload. + // Type args will be inferred from other uses (e.g., from the local type). + } + } + } + Rvalue::EnumTag { enum_name, .. } | Rvalue::EnumPayload { enum_name, .. } => { + // These will be handled by looking at the operand's type, + // which should be a local with AirType::Enum("Option") etc. + // The actual rewriting happens in the rewrite pass. + if generic_enums.contains_key(enum_name) { + // We'll handle these during rewriting + } + } + _ => {} + } +} + +fn collect_premangled_enum_requests_from_type( + ty: &AirType, + generic_enums: &HashMap, + enum_defs: &[AirEnumDef], + requests: &mut HashMap<(String, Vec), Vec>, +) { + match ty { + AirType::Enum(name) => { + if !name.starts_with("__mono_") { + return; + } + for (enum_name, &enum_idx) in generic_enums { + let prefix = format!("__mono_{}_", enum_name); + if let Some(type_suffix) = name.strip_prefix(&prefix) { + let enum_def = &enum_defs[enum_idx]; + if let Some(type_args) = resolve_type_args_from_suffix(type_suffix, enum_def) { + let key_strs: Vec = + type_args.iter().map(substitute::type_to_string).collect(); + let key = (enum_name.clone(), key_strs); + let inserted = + requests.entry(key).or_insert_with(|| type_args.clone()).clone(); + for nested in &inserted { + collect_premangled_enum_requests_from_type( + nested, + generic_enums, + enum_defs, + requests, + ); + } + } + break; + } + } + } + AirType::Ptr(inner) | AirType::Slice(inner) => { + collect_premangled_enum_requests_from_type(inner, generic_enums, enum_defs, requests); + } + AirType::Array(inner, _) => { + collect_premangled_enum_requests_from_type(inner, generic_enums, enum_defs, requests); + } + AirType::FnPtr { params, ret, .. } => { + for param in params { + collect_premangled_enum_requests_from_type(param, generic_enums, enum_defs, requests); + } + collect_premangled_enum_requests_from_type(ret, generic_enums, enum_defs, requests); + } + _ => {} + } +} + +fn infer_enum_type_args( + enum_def: &AirEnumDef, + rvalue: &Rvalue, + func: &AirFunction, +) -> Option> { + if let Rvalue::EnumInit { + variant, payload, .. + } = rvalue + { + let variant_def = enum_def.variants.iter().find(|v| v.name == *variant)?; + + let mut resolved: HashMap = HashMap::new(); + + for (param_ty, operand) in variant_def.payload.iter().zip(payload.iter()) { + let arg_ty = operand_type_from(operand, &func.params, &func.locals); + unify_enum_param(param_ty, &arg_ty, &mut resolved); + } + + let mut type_args = Vec::with_capacity(enum_def.type_params.len()); + for tp in &enum_def.type_params { + type_args.push(resolved.get(&tp.0)?.clone()); + } + Some(type_args) + } else { + None + } +} + +fn unify_enum_param(param_ty: &AirType, arg_ty: &AirType, resolved: &mut HashMap) { + match param_ty { + AirType::Param(id) => { + resolved.entry(id.0).or_insert_with(|| arg_ty.clone()); + } + AirType::Ptr(inner) => { + if let AirType::Ptr(arg_inner) = arg_ty { + unify_enum_param(inner, arg_inner, resolved); + } + } + AirType::Array(inner, _) => { + if let AirType::Array(arg_inner, _) = arg_ty { + unify_enum_param(inner, arg_inner, resolved); + } + } + AirType::Slice(inner) => { + if let AirType::Slice(arg_inner) = arg_ty { + unify_enum_param(inner, arg_inner, resolved); + } + } + _ => {} + } +} + +fn rewrite_enum_refs_in_stmt( + stmt: &mut AirStmt, + generic_enums: &HashMap, + enum_defs: &[AirEnumDef], + mono_enum_names: &HashMap<(String, Vec), String>, + func_params: &[AirParam], + func_locals: &[AirLocal], + local_mono_map: &HashMap, + errors: &mut Vec, +) { + if let AirStmtKind::Assign { place, rvalue } = &mut stmt.kind { + match rvalue { + Rvalue::EnumInit { + enum_name, + variant, + payload, + .. + } => { + if let Some(&enum_idx) = generic_enums.get(enum_name.as_str()) { + let enum_def = &enum_defs[enum_idx]; + let mut resolved_from_payload = false; + if !payload.is_empty() { + // Non-unit variant: infer type args from payload + let variant_def = enum_def.variants.iter().find(|v| v.name == *variant); + if let Some(vd) = variant_def { + let mut resolved: HashMap = HashMap::new(); + for (param_ty, operand) in vd.payload.iter().zip(payload.iter()) { + let arg_ty = + operand_type_from(operand, func_params, func_locals); + unify_enum_param(param_ty, &arg_ty, &mut resolved); + } + let type_args: Option> = enum_def + .type_params + .iter() + .map(|tp| resolved.get(&tp.0).cloned()) + .collect(); + if let Some(type_args) = type_args { + let key_strs: Vec = + type_args.iter().map(substitute::type_to_string).collect(); + let key = (enum_name.clone(), key_strs); + if let Some(mangled) = mono_enum_names.get(&key) { + *enum_name = mangled.clone(); + resolved_from_payload = true; + } + } + } + } + if !resolved_from_payload { + // Unit variant: use the target local's resolved mono name + if let Place::Local(target_id) = place { + if let Some(mangled) = local_mono_map.get(target_id) { + *enum_name = mangled.clone(); + } else { + // Fallback: unique match + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == enum_name.as_str()) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + *enum_name = mono_names[0].clone(); + } else if mono_names.len() > 1 { + errors.push(format!( + "ambiguous unit variant {}::{} with {} \ + monomorphizations; cannot determine which to use \ + (type annotation info lost during sema)", + enum_name, variant, mono_names.len() + )); + } + } + } + } + } + } + Rvalue::EnumTag { + enum_name, + operand, + .. + } => { + if generic_enums.contains_key(enum_name.as_str()) { + // Resolve from the operand's local + if let Some(mangled) = resolve_operand_mono(operand, local_mono_map) { + *enum_name = mangled; + } else { + // Fallback: unique match + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == enum_name.as_str()) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + *enum_name = mono_names[0].clone(); + } + } + } + } + Rvalue::EnumPayload { + enum_name, + operand, + .. + } => { + if generic_enums.contains_key(enum_name.as_str()) { + // Resolve from the operand's local + if let Some(mangled) = resolve_operand_mono(operand, local_mono_map) { + *enum_name = mangled; + } else { + // Fallback: unique match + let mono_names: Vec<_> = mono_enum_names + .iter() + .filter(|((en, _), _)| en == enum_name.as_str()) + .map(|(_, mn)| mn.clone()) + .collect(); + if mono_names.len() == 1 { + *enum_name = mono_names[0].clone(); + } + } + } + } + _ => {} + } + } +} + +/// Resolve the monomorphized enum name from an operand by looking up the local in +/// the local_mono_map. +fn resolve_operand_mono( + operand: &Operand, + local_mono_map: &HashMap, +) -> Option { + match operand { + Operand::Copy(id) | Operand::Move(id) => local_mono_map.get(id).cloned(), + Operand::Const(_) => None, + } +} + +/// Resolve AirType values from suffix strings produced by `type_to_string`. +/// Each string in `key_strs` is a single type arg (split by `$` separator). +/// Handles primitives, enum types (prefixed with "enum_"), and struct types. +fn resolve_type_args_from_suffix(type_suffix: &str, enum_def: &AirEnumDef) -> Option> { + let arity = enum_def.type_params.len(); + if arity == 0 { + return Some(Vec::new()); + } + let segments: Vec<&str> = type_suffix.split('$').collect(); + resolve_type_args_from_segments(&segments, arity) +} + +fn resolve_type_args_from_segments(segments: &[&str], remaining: usize) -> Option> { + if remaining == 0 { + return segments.is_empty().then_some(Vec::new()); + } + if segments.len() < remaining { + return None; + } + + let max_take = segments.len() - remaining + 1; + for take in (1..=max_take).rev() { + let candidate = segments[..take].join("$"); + if let Some(ty) = resolve_single_type_arg(&candidate) + && let Some(mut rest) = + resolve_type_args_from_segments(&segments[take..], remaining - 1) + { + let mut type_args = vec![ty]; + type_args.append(&mut rest); + return Some(type_args); + } + } + + None +} + +fn resolve_type_list_from_segments(segments: &[&str]) -> Option> { + if segments.is_empty() { + return Some(Vec::new()); + } + + for take in (1..=segments.len()).rev() { + let candidate = segments[..take].join("$"); + if let Some(ty) = resolve_single_type_arg(&candidate) + && let Some(mut rest) = resolve_type_list_from_segments(&segments[take..]) + { + let mut items = vec![ty]; + items.append(&mut rest); + return Some(items); + } + } + + None +} + +fn resolve_single_type_arg(s: &str) -> Option { + let ty = match s { + "i8" => AirType::I8, + "i16" => AirType::I16, + "i32" => AirType::I32, + "i64" => AirType::I64, + "u8" => AirType::U8, + "u16" => AirType::U16, + "u32" => AirType::U32, + "u64" => AirType::U64, + "f32" => AirType::F32, + "f64" => AirType::F64, + "bool" => AirType::Bool, + "str" => AirType::Str, + "opaque" => AirType::Opaque, + "void" => AirType::Void, + other => { + if let Some(rest) = other.strip_prefix("ptr_") { + AirType::Ptr(Box::new(resolve_single_type_arg(rest)?)) + } else if let Some(rest) = other.strip_prefix("slice_") { + AirType::Slice(Box::new(resolve_single_type_arg(rest)?)) + } else if let Some(rest) = other.strip_prefix("array_") { + let split = rest.rfind('_')?; + let inner = &rest[..split]; + let n = rest[split + 1..].parse().ok()?; + AirType::Array(Box::new(resolve_single_type_arg(inner)?), n) + } else if let Some((rest, conv)) = other + .strip_prefix("fnptrRust$") + .map(|rest| (rest, CallingConv::Rust)) + .or_else(|| other.strip_prefix("fnptrC$").map(|rest| (rest, CallingConv::C))) + .or_else(|| { + other + .strip_prefix("fnptr$") + .map(|rest| (rest, CallingConv::Aelys)) + }) + { + let (params, ret) = if let Some(ret) = rest.strip_prefix("$R") { + (Vec::new(), resolve_single_type_arg(ret)?) + } else { + let segments: Vec<&str> = rest.split('$').collect(); + let mut parsed: Option<(Vec, AirType)> = None; + for ret_start in 1..segments.len() { + let ret_head = match segments[ret_start].strip_prefix('R') { + Some(head) => head, + None => continue, + }; + let mut ret_segments = Vec::with_capacity(segments.len() - ret_start); + ret_segments.push(ret_head); + ret_segments.extend_from_slice(&segments[ret_start + 1..]); + let Some(params) = resolve_type_list_from_segments(&segments[..ret_start]) + else { + continue; + }; + let Some(ret) = resolve_single_type_arg(&ret_segments.join("$")) else { + continue; + }; + parsed = Some((params, ret)); + break; + } + parsed? + }; + AirType::FnPtr { + params, + ret: Box::new(ret), + conv, + } + } else if let Some(rest) = other.strip_prefix("enum_") { + AirType::Enum(rest.to_string()) + } else if let Some(rest) = other.strip_prefix("param_") { + AirType::Param(TypeParamId(rest.parse().ok()?)) + } else { + if other.contains('$') && !other.starts_with("__mono_") { + return None; + } + AirType::Struct(other.to_string()) + } + } + }; + Some(ty) +} + +pub(super) struct MonoContext { + pub(super) generic_functions: HashMap, + pub(super) requests: Vec, + pub(super) instantiated: HashMap<(String, Vec), String>, + pub(super) next_function_id: u32, +} + +pub(super) struct MonoRequest { + pub(super) function_name: String, + pub(super) type_args: Vec, +} + +impl MonoContext { + fn new(program: &AirProgram) -> Self { + let generic_functions: HashMap = program + .functions + .iter() + .enumerate() + .filter(|(_, f)| !f.type_params.is_empty()) + .map(|(i, f)| (f.name.clone(), i)) + .collect(); + + Self { + generic_functions, + requests: Vec::new(), + instantiated: HashMap::new(), + next_function_id: program.functions.len() as u32, + } + } + + fn collect_mono_requests(&mut self, program: &AirProgram) { + let generic_names: HashSet = self.generic_functions.keys().cloned().collect(); + + for func in &program.functions { + if func.type_params.is_empty() { + self.collect_from_function(func, program, &generic_names); + } + } + } + + fn collect_from_function( + &mut self, + func: &AirFunction, + program: &AirProgram, + generic_names: &HashSet, + ) { + for block in &func.blocks { + for stmt in &block.stmts { + self.collect_from_stmt(stmt, func, program, generic_names); + } + self.collect_from_terminator(&block.terminator, func, program, generic_names); + } + } + + fn collect_from_stmt( + &mut self, + stmt: &AirStmt, + caller: &AirFunction, + program: &AirProgram, + generic_names: &HashSet, + ) { + match &stmt.kind { + AirStmtKind::Assign { + rvalue: Rvalue::Call { func: callee, args }, + .. + } => { + self.try_collect(callee, args, caller, program, generic_names); + } + AirStmtKind::CallVoid { func: callee, args } => { + self.try_collect(callee, args, caller, program, generic_names); + } + _ => {} + } + } + + fn collect_from_terminator( + &mut self, + term: &AirTerminator, + caller: &AirFunction, + program: &AirProgram, + generic_names: &HashSet, + ) { + if let AirTerminator::Invoke { + func: callee, args, .. + } = term + { + self.try_collect(callee, args, caller, program, generic_names); + } + } + + fn try_collect( + &mut self, + callee: &Callee, + args: &[Operand], + caller: &AirFunction, + program: &AirProgram, + generic_names: &HashSet, + ) { + let name = match callee { + Callee::Named(n) if generic_names.contains(n) => n, + _ => return, + }; + + let func_idx = self.generic_functions[name]; + let generic_func = &program.functions[func_idx]; + + if let Some(type_args) = self.infer_type_args(generic_func, args, caller) { + let key = (name.clone(), self.type_args_key(&type_args)); + if !self.instantiated.contains_key(&key) { + self.requests.push(MonoRequest { + function_name: name.clone(), + type_args, + }); + } + } + } + + fn infer_type_args( + &self, + generic_func: &AirFunction, + args: &[Operand], + caller: &AirFunction, + ) -> Option> { + let mut resolved: HashMap = HashMap::new(); + + for (param, arg) in generic_func.params.iter().zip(args.iter()) { + let arg_ty = operand_type_from(arg, &caller.params, &caller.locals); + self.unify_param(¶m.ty, &arg_ty, &mut resolved); + } + + let mut type_args = Vec::with_capacity(generic_func.type_params.len()); + for tp in &generic_func.type_params { + type_args.push(resolved.get(&tp.0)?.clone()); + } + Some(type_args) + } + + pub(super) fn unify_param( + &self, + param_ty: &AirType, + arg_ty: &AirType, + resolved: &mut HashMap, + ) { + match param_ty { + AirType::Param(id) => { + resolved.entry(id.0).or_insert_with(|| arg_ty.clone()); + } + AirType::Ptr(inner) => { + if let AirType::Ptr(arg_inner) = arg_ty { + self.unify_param(inner, arg_inner, resolved); + } + } + AirType::Array(inner, _) => { + if let AirType::Array(arg_inner, _) = arg_ty { + self.unify_param(inner, arg_inner, resolved); + } + } + AirType::Slice(inner) => { + if let AirType::Slice(arg_inner) = arg_ty { + self.unify_param(inner, arg_inner, resolved); + } + } + AirType::FnPtr { params, ret, .. } => { + if let AirType::FnPtr { + params: arg_params, + ret: arg_ret, + .. + } = arg_ty + { + for (p, a) in params.iter().zip(arg_params.iter()) { + self.unify_param(p, a, resolved); + } + self.unify_param(ret, arg_ret, resolved); + } + } + _ => {} + } + } + + pub(super) fn type_args_key(&self, types: &[AirType]) -> Vec { + types.iter().map(substitute::type_to_string).collect() + } + + pub(super) fn mangle_name(&self, name: &str, type_args: &[AirType]) -> String { + if type_args.is_empty() { + return name.to_string(); + } + let type_str = type_args + .iter() + .map(substitute::type_to_string) + .collect::>() + .join("$"); + format!("__mono_{}_{}", name, type_str) + } + + fn instantiate(&mut self, program: &mut AirProgram) { + let mut new_functions = Vec::new(); + let mut mono_instances = Vec::new(); + + for request in &self.requests { + let key = ( + request.function_name.clone(), + self.type_args_key(&request.type_args), + ); + + if self.instantiated.contains_key(&key) { + continue; + } + + let func_idx = self.generic_functions[&request.function_name]; + let original_func = &program.functions[func_idx]; + let original_id = original_func.id; + let new_id = FunctionId(self.next_function_id); + self.next_function_id += 1; + + let mangled_name = self.mangle_name(&request.function_name, &request.type_args); + let saved_type_params = original_func.type_params.clone(); + let mut new_func = original_func.clone(); + new_func.id = new_id; + new_func.name = mangled_name.clone(); + new_func.type_params = Vec::new(); + + substitute::substitute_types_in_function( + &mut new_func, + &saved_type_params, + &request.type_args, + ); + + new_functions.push(new_func); + self.instantiated.insert(key, mangled_name); + + mono_instances.push(MonoInstance { + original: original_id, + type_args: request.type_args.clone(), + result: new_id, + }); + } + + program.functions.extend(new_functions); + program.mono_instances.extend(mono_instances); + } +} diff --git a/air/src/mono/rewrite.rs b/air/src/mono/rewrite.rs new file mode 100644 index 0000000..02b0167 --- /dev/null +++ b/air/src/mono/rewrite.rs @@ -0,0 +1,214 @@ +use super::MonoContext; +use super::substitute::operand_type_from; +use crate::*; +use std::collections::{HashMap, HashSet}; + +impl MonoContext { + pub(super) fn rewrite_call_sites(&self, program: &mut AirProgram) { + if self.instantiated.is_empty() { + return; + } + + // Collect return types of monomorphized functions so we can patch + // caller locals that store generic call results (before, their type was + // a placeholder i64 from Dynamic, but needs to become the real type) + let mono_ret_types: HashMap = self + .instantiated + .values() + .filter_map(|mangled_name| { + program + .functions + .iter() + .find(|f| f.name == *mangled_name) + .map(|f| (mangled_name.clone(), f.ret_ty.clone())) + }) + .collect(); + + // pre-collect generic function signatures for type inference during rewriting + let generic_sigs: HashMap, Vec)> = self + .generic_functions + .iter() + .map(|(name, &idx)| { + let f = &program.functions[idx]; + (name.clone(), (f.params.clone(), f.type_params.clone())) + }) + .collect(); + let generic_names: HashSet<&str> = generic_sigs.keys().map(|s| s.as_str()).collect(); + + for func in &mut program.functions { + if !func.type_params.is_empty() { + continue; + } + let caller_params = func.params.clone(); + let caller_locals = func.locals.clone(); + let mut local_type_patches: Vec<(LocalId, AirType)> = Vec::new(); + + for block in &mut func.blocks { + for stmt in &mut block.stmts { + self.rewrite_stmt( + stmt, + &caller_params, + &caller_locals, + &generic_sigs, + &generic_names, + &mono_ret_types, + &mut local_type_patches, + ); + } + self.rewrite_terminator( + &mut block.terminator, + &caller_params, + &caller_locals, + &generic_sigs, + &generic_names, + &mono_ret_types, + &mut local_type_patches, + ); + } + + // apply collected type patches to the caller's locals + // this fixes the type mismatch between the placeholder i64 and + // the actual return type of the monomorphized callee + for (local_id, new_ty) in local_type_patches { + if let Some(local) = func.locals.iter_mut().find(|l| l.id == local_id) { + local.ty = new_ty; + } + } + } + } + + fn rewrite_stmt( + &self, + stmt: &mut AirStmt, + caller_params: &[AirParam], + caller_locals: &[AirLocal], + generic_sigs: &HashMap, Vec)>, + generic_names: &HashSet<&str>, + mono_ret_types: &HashMap, + local_type_patches: &mut Vec<(LocalId, AirType)>, + ) { + match &mut stmt.kind { + AirStmtKind::Assign { + place, + rvalue: Rvalue::Call { func: callee, args }, + } => { + self.rewrite_callee( + callee, + args, + caller_params, + caller_locals, + generic_sigs, + generic_names, + ); + // after rewriting, patch the destination local's type to match + // the monomorphized function's return type + if let Place::Local(local_id) = place { + if let Callee::Named(name) = callee { + if let Some(ret_ty) = mono_ret_types.get(name.as_str()) { + local_type_patches.push((*local_id, ret_ty.clone())); + } + } + } + } + AirStmtKind::CallVoid { func: callee, args } => { + self.rewrite_callee( + callee, + args, + caller_params, + caller_locals, + generic_sigs, + generic_names, + ); + } + _ => {} + } + } + + fn rewrite_terminator( + &self, + term: &mut AirTerminator, + caller_params: &[AirParam], + caller_locals: &[AirLocal], + generic_sigs: &HashMap, Vec)>, + generic_names: &HashSet<&str>, + mono_ret_types: &HashMap, + local_type_patches: &mut Vec<(LocalId, AirType)>, + ) { + if let AirTerminator::Invoke { + func: callee, + args, + ret, + .. + } = term + { + self.rewrite_callee( + callee, + args, + caller_params, + caller_locals, + generic_sigs, + generic_names, + ); + // patch destination local for Invoke too + if let Place::Local(local_id) = ret { + if let Callee::Named(name) = callee { + if let Some(ret_ty) = mono_ret_types.get(name.as_str()) { + local_type_patches.push((*local_id, ret_ty.clone())); + } + } + } + } + } + + fn rewrite_callee( + &self, + callee: &mut Callee, + args: &[Operand], + caller_params: &[AirParam], + caller_locals: &[AirLocal], + generic_sigs: &HashMap, Vec)>, + generic_names: &HashSet<&str>, + ) { + if let Callee::Named(name) = callee + && generic_names.contains(name.as_str()) + { + if let Some((gen_params, gen_type_params)) = generic_sigs.get(name.as_str()) { + // re-infer type arguments from the call site's actual operand types + if let Some(type_args) = self.infer_type_args_from_sig( + gen_params, + gen_type_params, + args, + caller_params, + caller_locals, + ) { + let key = (name.clone(), self.type_args_key(&type_args)); + if let Some(mangled) = self.instantiated.get(&key) { + *name = mangled.clone(); + } + } + } + } + } + + fn infer_type_args_from_sig( + &self, + generic_params: &[AirParam], + type_params: &[TypeParamId], + args: &[Operand], + caller_params: &[AirParam], + caller_locals: &[AirLocal], + ) -> Option> { + let mut resolved: HashMap = HashMap::new(); + + for (param, arg) in generic_params.iter().zip(args.iter()) { + let arg_ty = operand_type_from(arg, caller_params, caller_locals); + self.unify_param(¶m.ty, &arg_ty, &mut resolved); + } + + let mut type_args = Vec::with_capacity(type_params.len()); + for tp in type_params { + type_args.push(resolved.get(&tp.0)?.clone()); + } + Some(type_args) + } +} diff --git a/air/src/mono/substitute.rs b/air/src/mono/substitute.rs new file mode 100644 index 0000000..c72d50f --- /dev/null +++ b/air/src/mono/substitute.rs @@ -0,0 +1,204 @@ +use crate::*; + +pub(crate) fn type_to_string(ty: &AirType) -> String { + match ty { + AirType::I8 => "i8".to_string(), + AirType::I16 => "i16".to_string(), + AirType::I32 => "i32".to_string(), + AirType::I64 => "i64".to_string(), + AirType::U8 => "u8".to_string(), + AirType::U16 => "u16".to_string(), + AirType::U32 => "u32".to_string(), + AirType::U64 => "u64".to_string(), + AirType::F32 => "f32".to_string(), + AirType::F64 => "f64".to_string(), + AirType::Bool => "bool".to_string(), + AirType::Str => "str".to_string(), + AirType::Ptr(inner) => format!("ptr_{}", type_to_string(inner)), + AirType::Struct(name) => name.clone(), + AirType::Enum(name) => format!("enum_{}", name), + AirType::Array(inner, size) => format!("array_{}_{}", type_to_string(inner), size), + AirType::Slice(inner) => format!("slice_{}", type_to_string(inner)), + // was using _ as separator everywhere, so fn(i32, f64)->bool and + // fn(i32)->f64 with a bool from somewhere else both gave "fnptr_i32_f64_bool" + AirType::FnPtr { params, ret, conv } => { + let params_str = params + .iter() + .map(type_to_string) + .collect::>() + .join("$"); + // Calling convention is part of the fnptr type; omitting it aliases + // distinct ABI shapes onto the same monomorphized enum/function name. + let prefix = match conv { + CallingConv::Aelys => "fnptr", + CallingConv::C => "fnptrC", + CallingConv::Rust => "fnptrRust", + }; + format!("{prefix}${}$R{}", params_str, type_to_string(ret)) + } + AirType::Param(id) => format!("param_{}", id.0), + AirType::Opaque => "opaque".to_string(), + AirType::Void => "void".to_string(), + } +} + +pub(super) fn substitute_types_in_function( + func: &mut AirFunction, + type_params: &[TypeParamId], + type_args: &[AirType], +) { + for param in &mut func.params { + substitute_type(&mut param.ty, type_params, type_args); + } + substitute_type(&mut func.ret_ty, type_params, type_args); + + for local in &mut func.locals { + substitute_type(&mut local.ty, type_params, type_args); + } + + for block in &mut func.blocks { + for stmt in &mut block.stmts { + substitute_stmt(stmt, type_params, type_args); + } + substitute_terminator(&mut block.terminator, type_params, type_args); + } +} + +fn substitute_type(ty: &mut AirType, type_params: &[TypeParamId], type_args: &[AirType]) { + match ty { + AirType::Param(id) => { + if let Some(idx) = type_params.iter().position(|p| p == id) + && let Some(replacement) = type_args.get(idx) + { + *ty = replacement.clone(); + } + } + AirType::Ptr(inner) => substitute_type(inner, type_params, type_args), + AirType::Array(inner, _) => substitute_type(inner, type_params, type_args), + AirType::Slice(inner) => substitute_type(inner, type_params, type_args), + AirType::FnPtr { params, ret, .. } => { + for p in params { + substitute_type(p, type_params, type_args); + } + substitute_type(ret, type_params, type_args); + } + AirType::Enum(name) if !name.contains("__mono_") && !type_args.is_empty() => { + let suffix = type_args + .iter() + .map(type_to_string) + .collect::>() + .join("$"); + *ty = AirType::Enum(format!("__mono_{}_{}", name, suffix)); + } + AirType::Struct(name) if !name.contains("__mono_") && !name.starts_with("__closure_env_") && !type_args.is_empty() => { + let suffix = type_args + .iter() + .map(type_to_string) + .collect::>() + .join("$"); + *ty = AirType::Struct(format!("__mono_{}_{}", name, suffix)); + } + _ => {} + } +} + +fn substitute_stmt(stmt: &mut AirStmt, type_params: &[TypeParamId], type_args: &[AirType]) { + match &mut stmt.kind { + AirStmtKind::Assign { rvalue, .. } => { + substitute_rvalue(rvalue, type_params, type_args); + } + AirStmtKind::GcAlloc { ty, .. } | AirStmtKind::Alloc { ty, .. } => { + substitute_type(ty, type_params, type_args); + } + _ => {} + } +} + +fn substitute_rvalue(rvalue: &mut Rvalue, type_params: &[TypeParamId], type_args: &[AirType]) { + match rvalue { + Rvalue::Cast { from, to, .. } => { + substitute_type(from, type_params, type_args); + substitute_type(to, type_params, type_args); + } + // old code looped over type_params and renamed one at a time + // second iteration saw __mono_ prefix and bailed, which meant that only first param got encoded + Rvalue::StructInit { name, .. } => { + if !name.contains("__mono_") && !type_args.is_empty() { + let suffix = type_args + .iter() + .map(type_to_string) + .collect::>() + .join("$"); + *name = format!("__mono_{}_{}", name, suffix); + } + } + Rvalue::EnumInit { enum_name, .. } + | Rvalue::EnumTag { enum_name, .. } + | Rvalue::EnumPayload { enum_name, .. } => { + if !enum_name.contains("__mono_") && !type_args.is_empty() { + let suffix = type_args + .iter() + .map(type_to_string) + .collect::>() + .join("$"); + *enum_name = format!("__mono_{}_{}", enum_name, suffix); + } + } + Rvalue::ClosureCreate { .. } => {} + _ => {} + } +} + +fn substitute_terminator( + _term: &mut AirTerminator, + _type_params: &[TypeParamId], + _type_args: &[AirType], +) { + // Callee rewriting happens in rewrite_call_sites (after all instances exist). + // Operand/place types come from locals, which are already substituted. +} + +pub(super) fn operand_type_from( + operand: &Operand, + params: &[AirParam], + locals: &[AirLocal], +) -> AirType { + match operand { + Operand::Const(c) => match c { + AirConst::IntLiteral(_) => AirType::I64, + AirConst::Int(_, size) => match size { + AirIntSize::I8 => AirType::I8, + AirIntSize::I16 => AirType::I16, + AirIntSize::I32 => AirType::I32, + AirIntSize::I64 => AirType::I64, + AirIntSize::U8 => AirType::U8, + AirIntSize::U16 => AirType::U16, + AirIntSize::U32 => AirType::U32, + AirIntSize::U64 => AirType::U64, + }, + AirConst::Float(_, size) => match size { + AirFloatSize::F32 => AirType::F32, + AirFloatSize::F64 => AirType::F64, + }, + AirConst::Bool(_) => AirType::Bool, + AirConst::Str(_) => AirType::Str, + AirConst::Null => AirType::Ptr(Box::new(AirType::Void)), + AirConst::FnRef(_) => AirType::Ptr(Box::new(AirType::Void)), + AirConst::Enum { enum_name, .. } => AirType::Enum(enum_name.clone()), + AirConst::ZeroInit(ty) | AirConst::Undef(ty) => ty.clone(), + AirConst::Array(_) => AirType::Opaque, // type not recoverable without element info + AirConst::Struct { name, .. } => AirType::Struct(name.clone()), + }, + Operand::Copy(id) | Operand::Move(id) => params + .iter() + .find(|p| p.id == *id) + .map(|p| p.ty.clone()) + .or_else(|| locals.iter().find(|l| l.id == *id).map(|l| l.ty.clone())) + .unwrap_or_else(|| { + panic!( + "mono: operand_type_from: local %{} not found in params or locals", + id.0 + ) + }), + } +} diff --git a/air/src/passes/copy_elim.rs b/air/src/passes/copy_elim.rs new file mode 100644 index 0000000..0a7f5b7 --- /dev/null +++ b/air/src/passes/copy_elim.rs @@ -0,0 +1,269 @@ +use crate::{ + AirFunction, AirProgram, AirStmt, AirStmtKind, AirTerminator, Callee, LocalId, Operand, Place, + Rvalue, +}; +use std::collections::{HashMap, HashSet}; + +pub fn eliminate_copies(program: &mut AirProgram) { + for function in &mut program.functions { + eliminate_function_copies(function); + } +} + +fn eliminate_function_copies(function: &mut AirFunction) { + let params: HashSet = function.params.iter().map(|p| p.id).collect(); + if params.is_empty() { + return; + } + + let writes = collect_write_counts(function); + let direct_aliases = collect_direct_aliases(function, &writes); + let replacements = resolve_to_params(&direct_aliases, ¶ms); + if replacements.is_empty() { + return; + } + + for block in &mut function.blocks { + block + .stmts + .retain(|stmt| !is_eliminated_copy_stmt(stmt, &replacements)); + for stmt in &mut block.stmts { + rewrite_stmt(stmt, &replacements); + } + rewrite_terminator(&mut block.terminator, &replacements); + } +} + +fn collect_write_counts(function: &AirFunction) -> HashMap { + let mut counts = HashMap::new(); + for block in &function.blocks { + for stmt in &block.stmts { + match &stmt.kind { + AirStmtKind::Assign { place, .. } => bump_place_write(place, &mut counts), + AirStmtKind::GcAlloc { local, .. } | AirStmtKind::Alloc { local, .. } => { + bump_local(*local, &mut counts) + } + _ => {} + } + } + + if let AirTerminator::Invoke { ret, .. } = &block.terminator { + bump_place_write(ret, &mut counts); + } + } + counts +} + +fn collect_direct_aliases( + function: &AirFunction, + writes: &HashMap, +) -> HashMap { + let mut aliases = HashMap::new(); + for block in &function.blocks { + for stmt in &block.stmts { + let (dst, src) = match copy_stmt_locals(stmt) { + Some(pair) => pair, + None => continue, + }; + if dst == src { + continue; + } + // Only safe to alias when dst is written exactly once (the copy itself) + // AND src is never written in the body (it's immutable). If src is + // modified later (e.g. a param reassigned in a loop), the alias would + // replace dst with a stale/wrong value. + if writes.get(&dst).copied().unwrap_or(0) == 1 + && writes.get(&src).copied().unwrap_or(0) == 0 + { + aliases.insert(dst, src); + } + } + } + aliases +} + +fn resolve_to_params( + direct_aliases: &HashMap, + params: &HashSet, +) -> HashMap { + let mut replacements = HashMap::new(); + for &dst in direct_aliases.keys() { + if let Some(param) = resolve_param_target(dst, direct_aliases, params) { + replacements.insert(dst, param); + } + } + replacements +} + +fn resolve_param_target( + dst: LocalId, + direct_aliases: &HashMap, + params: &HashSet, +) -> Option { + let mut seen = HashSet::new(); + let mut current = *direct_aliases.get(&dst)?; + while !params.contains(¤t) { + if !seen.insert(current) { + return None; + } + current = *direct_aliases.get(¤t)?; + } + Some(current) +} + +fn is_eliminated_copy_stmt(stmt: &AirStmt, replacements: &HashMap) -> bool { + let (dst, src) = match copy_stmt_locals(stmt) { + Some(pair) => pair, + None => return false, + }; + match replacements.get(&dst).copied() { + Some(target) => rewrite_local(src, replacements) == target, + None => false, + } +} + +fn copy_stmt_locals(stmt: &AirStmt) -> Option<(LocalId, LocalId)> { + match &stmt.kind { + AirStmtKind::Assign { + place: Place::Local(dst), + rvalue: Rvalue::Use(Operand::Copy(src) | Operand::Move(src)), + } => Some((*dst, *src)), + _ => None, + } +} + +fn rewrite_stmt(stmt: &mut AirStmt, replacements: &HashMap) { + match &mut stmt.kind { + AirStmtKind::Assign { place, rvalue } => { + rewrite_place(place, replacements); + rewrite_rvalue(rvalue, replacements); + } + AirStmtKind::CallVoid { func, args } => { + rewrite_callee(func, replacements); + for arg in args { + rewrite_operand(arg, replacements); + } + } + AirStmtKind::GcDrop(local) | AirStmtKind::Free(local) => { + *local = rewrite_local(*local, replacements); + } + _ => {} + } +} + +fn rewrite_terminator(term: &mut AirTerminator, replacements: &HashMap) { + match term { + AirTerminator::Return(Some(op)) => rewrite_operand(op, replacements), + AirTerminator::Branch { cond, .. } => rewrite_operand(cond, replacements), + AirTerminator::Switch { discr, .. } => rewrite_operand(discr, replacements), + AirTerminator::Invoke { + func, args, ret, .. + } => { + rewrite_callee(func, replacements); + for arg in args { + rewrite_operand(arg, replacements); + } + rewrite_place(ret, replacements); + } + _ => {} + } +} + +fn rewrite_rvalue(value: &mut Rvalue, replacements: &HashMap) { + match value { + Rvalue::Use(op) | Rvalue::UnaryOp(_, op) | Rvalue::Deref(op) => { + rewrite_operand(op, replacements); + } + Rvalue::BinaryOp(_, left, right) => { + rewrite_operand(left, replacements); + rewrite_operand(right, replacements); + } + Rvalue::Call { func, args } => { + rewrite_callee(func, replacements); + for arg in args { + rewrite_operand(arg, replacements); + } + } + Rvalue::StructInit { fields, .. } => { + for (_, operand) in fields { + rewrite_operand(operand, replacements); + } + } + Rvalue::FieldAccess { base, .. } | Rvalue::Cast { operand: base, .. } => { + rewrite_operand(base, replacements); + } + Rvalue::Index { base, index } => { + rewrite_operand(base, replacements); + rewrite_operand(index, replacements); + } + Rvalue::AddressOf(local) => { + *local = rewrite_local(*local, replacements); + } + Rvalue::EnumInit { payload, .. } => { + for operand in payload { + rewrite_operand(operand, replacements); + } + } + Rvalue::EnumTag { operand, .. } => { + rewrite_operand(operand, replacements); + } + Rvalue::EnumPayload { operand, .. } => { + rewrite_operand(operand, replacements); + } + Rvalue::ClosureCreate { env, .. } => { + rewrite_operand(env, replacements); + } + } +} + +fn rewrite_callee(callee: &mut Callee, replacements: &HashMap) { + if let Callee::FnPtr(local) = callee { + *local = rewrite_local(*local, replacements); + } +} + +fn rewrite_operand(operand: &mut Operand, replacements: &HashMap) { + match operand { + Operand::Copy(local) | Operand::Move(local) => { + *local = rewrite_local(*local, replacements); + } + Operand::Const(_) => {} + } +} + +fn rewrite_place(place: &mut Place, replacements: &HashMap) { + match place { + Place::Local(local) | Place::Field(local, _) | Place::Deref(local) => { + *local = rewrite_local(*local, replacements); + } + Place::Index(local, index) => { + *local = rewrite_local(*local, replacements); + rewrite_operand(index, replacements); + } + } +} + +fn rewrite_local(local: LocalId, replacements: &HashMap) -> LocalId { + let mut current = local; + let mut seen = HashSet::new(); + while let Some(next) = replacements.get(¤t).copied() { + if !seen.insert(current) { + break; + } + current = next; + } + current +} + +fn bump_place_write(place: &Place, counts: &mut HashMap) { + match place { + Place::Local(local) | Place::Field(local, _) | Place::Index(local, _) => { + bump_local(*local, counts) + } + Place::Deref(_) => {} + } +} + +fn bump_local(local: LocalId, counts: &mut HashMap) { + *counts.entry(local).or_insert(0) += 1; +} diff --git a/air/src/passes/dead_locals.rs b/air/src/passes/dead_locals.rs new file mode 100644 index 0000000..0a0165e --- /dev/null +++ b/air/src/passes/dead_locals.rs @@ -0,0 +1,185 @@ +use crate::{ + AirFunction, AirProgram, AirStmtKind, AirTerminator, Callee, LocalId, Operand, Place, Rvalue, +}; +use std::collections::HashSet; + +pub fn eliminate_dead_locals(program: &mut AirProgram) { + for function in &mut program.functions { + eliminate_function_dead_locals(function); + } +} + +fn eliminate_function_dead_locals(function: &mut AirFunction) { + let mut referenced = HashSet::new(); + for block in &function.blocks { + for stmt in &block.stmts { + collect_stmt_locals(&stmt.kind, &mut referenced); + } + collect_terminator_locals(&block.terminator, &mut referenced); + } + + // remove dead locals from the locals list + function + .locals + .retain(|local| referenced.contains(&local.id)); + + // Transform or remove statements that assign to dead locals, dead store elimination + for block in &mut function.blocks { + for stmt in &mut block.stmts { + if let AirStmtKind::Assign { + place: Place::Local(local), + rvalue, + } = &stmt.kind + { + if !referenced.contains(local) { + // Dead store - transform Call into CallVoid (preserves side effects) + if let Rvalue::Call { func, args } = rvalue { + stmt.kind = AirStmtKind::CallVoid { + func: func.clone(), + args: args.clone(), + }; + } + // other rvalues without side effects can be left as-is and will be removed in a second pass + } + } + } + + // remove remaining dead stores (non-Call assigns to dead locals) + block.stmts.retain(|stmt| { + if let AirStmtKind::Assign { + place: Place::Local(local), + .. + } = &stmt.kind + { + referenced.contains(local) + } else { + true + } + }); + } +} + +fn collect_stmt_locals(stmt: &AirStmtKind, out: &mut HashSet) { + match stmt { + AirStmtKind::Assign { place, rvalue } => { + collect_place_locals(place, out); + collect_rvalue_locals(rvalue, out); + } + AirStmtKind::GcAlloc { local, .. } | AirStmtKind::Alloc { local, .. } => { + out.insert(*local); + } + AirStmtKind::GcDrop(local) | AirStmtKind::Free(local) => { + out.insert(*local); + } + AirStmtKind::CallVoid { func, args } => { + collect_callee_locals(func, out); + for arg in args { + collect_operand_locals(arg, out); + } + } + AirStmtKind::ArenaCreate(_) + | AirStmtKind::ArenaDestroy(_) + | AirStmtKind::MemoryFence(_) => {} + } +} + +fn collect_terminator_locals(term: &AirTerminator, out: &mut HashSet) { + match term { + AirTerminator::Return(Some(op)) => collect_operand_locals(op, out), + AirTerminator::Branch { cond, .. } => collect_operand_locals(cond, out), + AirTerminator::Switch { discr, .. } => collect_operand_locals(discr, out), + AirTerminator::Invoke { + func, args, ret, .. + } => { + collect_callee_locals(func, out); + for arg in args { + collect_operand_locals(arg, out); + } + collect_place_locals(ret, out); + } + AirTerminator::Return(None) + | AirTerminator::Goto(_) + | AirTerminator::Unwind + | AirTerminator::Unreachable + | AirTerminator::Panic { .. } => {} + } +} + +fn collect_rvalue_locals(rvalue: &Rvalue, out: &mut HashSet) { + match rvalue { + Rvalue::Use(op) | Rvalue::UnaryOp(_, op) | Rvalue::Deref(op) => { + collect_operand_locals(op, out); + } + Rvalue::BinaryOp(_, left, right) => { + collect_operand_locals(left, out); + collect_operand_locals(right, out); + } + Rvalue::Call { func, args } => { + collect_callee_locals(func, out); + for arg in args { + collect_operand_locals(arg, out); + } + } + Rvalue::StructInit { fields, .. } => { + for (_, operand) in fields { + collect_operand_locals(operand, out); + } + } + Rvalue::FieldAccess { base, .. } | Rvalue::Cast { operand: base, .. } => { + collect_operand_locals(base, out); + } + Rvalue::Index { base, index } => { + collect_operand_locals(base, out); + collect_operand_locals(index, out); + } + Rvalue::AddressOf(local) => { + out.insert(*local); + } + Rvalue::EnumInit { payload, .. } => { + for operand in payload { + collect_operand_locals(operand, out); + } + } + Rvalue::EnumTag { operand, .. } => { + collect_operand_locals(operand, out); + } + Rvalue::EnumPayload { operand, .. } => { + collect_operand_locals(operand, out); + } + Rvalue::ClosureCreate { env, .. } => { + collect_operand_locals(env, out); + } + } +} + +fn collect_place_locals(place: &Place, out: &mut HashSet) { + match place { + Place::Local(_) => { + // this is just a write destination, don't mark as referenced, only Field/Index/Deref need the base local to exist because they read it + } + Place::Field(local, _) | Place::Deref(local) => { + // must read the base to access field/deref + out.insert(*local); + } + Place::Index(local, operand) => { + // same thing + out.insert(*local); + collect_operand_locals(operand, out); // index expression uses this operand + } + } +} + +fn collect_operand_locals(operand: &Operand, out: &mut HashSet) { + match operand { + Operand::Copy(local) | Operand::Move(local) => { + out.insert(*local); + } + Operand::Const(_) => {} + } +} + +fn collect_callee_locals(callee: &Callee, out: &mut HashSet) { + if let Callee::FnPtr(local) = callee { + out.insert(*local); + } +} diff --git a/air/src/passes/mod.rs b/air/src/passes/mod.rs new file mode 100644 index 0000000..fffb98b --- /dev/null +++ b/air/src/passes/mod.rs @@ -0,0 +1,3 @@ +pub mod copy_elim; +pub mod dead_locals; +pub mod validate; diff --git a/air/src/passes/validate.rs b/air/src/passes/validate.rs new file mode 100644 index 0000000..bc0ff78 --- /dev/null +++ b/air/src/passes/validate.rs @@ -0,0 +1,680 @@ +//! AIR validation pass +//! +//! checks structural invariants of the AIR program before codegen +//! +//! this pass does not mutate the program, we just inspects it and +//! collects all violations found +//! +//! some stuff enforced: +//! +//! - no `AirType::Void` on a local unless it is the return position of a void returning function (i.e. only the implicit, so `_0` return local of a `ret_ty == Void` function may be Void) +//! - eo `AirType::Opaque` anywhere, because this means an unresolved Dynamic type survived past monomorphization +//! - every basic block has a structurally valid terminator, guaranteed by construction, but we double-check ! +//! - every local referenced in operands/places is declared in the function's params or locals list +//! - every block referenced by terminators exists in the function. + +use crate::{ + AirBlock, AirFunction, AirProgram, AirStmtKind, AirTerminator, AirType, BlockId, Callee, + LocalId, Operand, Place, Rvalue, +}; +use std::collections::HashSet; +use std::fmt; + +/// A single validation error with context about where it was found. +#[derive(Debug, Clone)] +pub struct AirValidationError { + pub function_name: String, + pub detail: AirValidationDetail, +} + +#[derive(Debug, Clone)] +pub enum AirValidationDetail { + /// A local has type Void but is not the return local of a void function. + VoidLocal { + local_id: u32, + local_name: Option, + }, + /// A local referenced in the body is not declared. + UndeclaredLocal { local_id: u32, context: String }, + /// A block referenced by a terminator does not exist. + UndeclaredBlock { block_id: u32, context: String }, + /// A local or param has type Opaque (unresolved Dynamic that survived monomorphization). + OpaqueType { + local_id: u32, + local_name: Option, + }, + /// A struct field has type Opaque. + OpaqueStructField { + struct_name: String, + field_name: String, + }, + /// A local or param references an enum definition that is not present in the AIR program. + UnknownEnumType { + local_id: u32, + local_name: Option, + enum_name: String, + }, + /// A struct field references an enum definition that is not present in the AIR program. + UnknownStructFieldEnum { + struct_name: String, + field_name: String, + enum_name: String, + }, + /// An enum operation references an enum definition that is not present in the AIR program. + UnknownEnumReference { + enum_name: String, + context: String, + }, + /// A function has no blocks (non-extern function with empty body). + EmptyBody, +} + +impl fmt::Display for AirValidationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "AIR validation error in `{}`: ", self.function_name)?; + match &self.detail { + AirValidationDetail::VoidLocal { + local_id, + local_name, + } => { + write!(f, "local %{local_id}")?; + if let Some(name) = local_name { + write!(f, " (`{name}`)")?; + } + write!( + f, + " has type Void, which is invalid outside return position" + ) + } + AirValidationDetail::UndeclaredLocal { + local_id, context, .. + } => { + write!(f, "local %{local_id} is used but not declared ({context})") + } + AirValidationDetail::UndeclaredBlock { + block_id, context, .. + } => { + write!( + f, + "block bb{block_id} is referenced but not declared ({context})" + ) + } + AirValidationDetail::OpaqueType { + local_id, + local_name, + } => { + write!(f, "local %{local_id}")?; + if let Some(name) = local_name { + write!(f, " (`{name}`)")?; + } + write!( + f, + " has unresolved Dynamic type (Opaque), inference or monomorphization did not resolve this type" + ) + } + AirValidationDetail::OpaqueStructField { + struct_name, + field_name, + } => { + write!( + f, + "struct `{struct_name}` field `{field_name}` has unresolved Dynamic type (Opaque)" + ) + } + AirValidationDetail::UnknownEnumType { + local_id, + local_name, + enum_name, + } => { + write!(f, "local %{local_id}")?; + if let Some(name) = local_name { + write!(f, " (`{name}`)")?; + } + write!( + f, + " references unknown enum `{enum_name}` after monomorphization" + ) + } + AirValidationDetail::UnknownStructFieldEnum { + struct_name, + field_name, + enum_name, + } => { + write!( + f, + "struct `{struct_name}` field `{field_name}` references unknown enum `{enum_name}` after monomorphization" + ) + } + AirValidationDetail::UnknownEnumReference { enum_name, context } => { + write!( + f, + "enum operation references unknown enum `{enum_name}` after monomorphization ({context})" + ) + } + AirValidationDetail::EmptyBody => { + write!(f, "non-extern function has no basic blocks") + } + } + } +} + +/// Returns true if the type contains Opaque anywhere (including nested in compound types like Array, Ptr, FnPtr, Slice) +fn contains_opaque(ty: &AirType) -> bool { + match ty { + AirType::Opaque => true, + AirType::Ptr(inner) | AirType::Array(inner, _) | AirType::Slice(inner) => { + contains_opaque(inner) + } + AirType::FnPtr { params, ret, .. } => { + params.iter().any(contains_opaque) || contains_opaque(ret) + } + _ => false, + } +} + +fn collect_unknown_enum_names(ty: &AirType, known_enums: &HashSet, missing: &mut Vec) { + match ty { + AirType::Enum(name) => { + if !known_enums.contains(name) && !missing.iter().any(|existing| existing == name) { + missing.push(name.clone()); + } + } + AirType::Ptr(inner) | AirType::Array(inner, _) | AirType::Slice(inner) => { + collect_unknown_enum_names(inner, known_enums, missing); + } + AirType::FnPtr { params, ret, .. } => { + for param in params { + collect_unknown_enum_names(param, known_enums, missing); + } + collect_unknown_enum_names(ret, known_enums, missing); + } + _ => {} + } +} + +/// Validate the entire AIR program. Returns `Ok(())` if all invariants hold, or `Err(errors)` with every violation found +pub fn validate_air(program: &AirProgram) -> Result<(), Vec> { + let mut errors = Vec::new(); + let known_enums: HashSet = program.enums.iter().map(|def| def.name.clone()).collect(); + + // Check struct fields for Opaque types. + for def in &program.structs { + for field in &def.fields { + if contains_opaque(&field.ty) { + errors.push(AirValidationError { + function_name: format!("struct {}", def.name), + detail: AirValidationDetail::OpaqueStructField { + struct_name: def.name.clone(), + field_name: field.name.clone(), + }, + }); + } + let mut missing = Vec::new(); + collect_unknown_enum_names(&field.ty, &known_enums, &mut missing); + for enum_name in missing { + errors.push(AirValidationError { + function_name: format!("struct {}", def.name), + detail: AirValidationDetail::UnknownStructFieldEnum { + struct_name: def.name.clone(), + field_name: field.name.clone(), + enum_name, + }, + }); + } + } + } + + for function in &program.functions { + validate_function(function, &known_enums, &mut errors); + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } +} + +fn validate_function( + function: &AirFunction, + known_enums: &HashSet, + errors: &mut Vec, +) { + // Skip extern declarations, they have no body by design. + if function.is_extern { + return; + } + + // non-extern function must have at least one block check + if function.blocks.is_empty() { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::EmptyBody, + }); + // No point checking locals/blocks if the body is empty + return; + } + + // no Void-typed locals except void return position check + // + // Convention: the return local is local %0 when ret_ty != Void + // + // When ret_ty == Void, local %0 may be Void (it's the implicit return slot) + // + // Any other local with Void type is a bug. + let is_void_return = function.ret_ty == AirType::Void; + + for local in &function.locals { + if local.ty == AirType::Void { + // Allow the return local (%0) of a void-returning function + if is_void_return && local.id == LocalId(0) { + continue; + } + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::VoidLocal { + local_id: local.id.0, + local_name: local.name.clone(), + }, + }); + } + // Reject Opaque types that survived past monomorphization. + if contains_opaque(&local.ty) { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::OpaqueType { + local_id: local.id.0, + local_name: local.name.clone(), + }, + }); + } + let mut missing = Vec::new(); + collect_unknown_enum_names(&local.ty, known_enums, &mut missing); + for enum_name in missing { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::UnknownEnumType { + local_id: local.id.0, + local_name: local.name.clone(), + enum_name, + }, + }); + } + } + + // Also check params for Void and Opaque types. + for param in &function.params { + if param.ty == AirType::Void { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::VoidLocal { + local_id: param.id.0, + local_name: Some(param.name.clone()), + }, + }); + } + if contains_opaque(¶m.ty) { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::OpaqueType { + local_id: param.id.0, + local_name: Some(param.name.clone()), + }, + }); + } + let mut missing = Vec::new(); + collect_unknown_enum_names(¶m.ty, known_enums, &mut missing); + for enum_name in missing { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::UnknownEnumType { + local_id: param.id.0, + local_name: Some(param.name.clone()), + enum_name, + }, + }); + } + } + + // check return type for Opaque. + if contains_opaque(&function.ret_ty) { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::OpaqueType { + local_id: 0, + local_name: Some("(return type)".to_string()), + }, + }); + } + let mut missing = Vec::new(); + collect_unknown_enum_names(&function.ret_ty, known_enums, &mut missing); + for enum_name in missing { + errors.push(AirValidationError { + function_name: function.name.clone(), + detail: AirValidationDetail::UnknownEnumType { + local_id: 0, + local_name: Some("(return type)".to_string()), + enum_name, + }, + }); + } + + // build declared-locals and declared-blocks sets + let declared_locals: HashSet = function + .params + .iter() + .map(|p| p.id) + .chain(function.locals.iter().map(|l| l.id)) + .collect(); + + let declared_blocks: HashSet = function.blocks.iter().map(|b| b.id).collect(); + + // all referenced locals exist check + for block in &function.blocks { + let block_ctx = format!("bb{}", block.id.0); + check_block_locals( + block, + &declared_locals, + known_enums, + &function.name, + &block_ctx, + errors, + ); + check_block_target_blocks(block, &declared_blocks, &function.name, &block_ctx, errors); + } +} + +fn check_block_locals( + block: &AirBlock, + declared: &HashSet, + known_enums: &HashSet, + func_name: &str, + block_ctx: &str, + errors: &mut Vec, +) { + for (i, stmt) in block.stmts.iter().enumerate() { + let ctx = format!("{block_ctx}, stmt #{i}"); + check_stmt_locals(&stmt.kind, declared, known_enums, func_name, &ctx, errors); + } + let ctx = format!("{block_ctx}, terminator"); + check_terminator_locals( + &block.terminator, + declared, + known_enums, + func_name, + &ctx, + errors, + ); +} + +fn check_stmt_locals( + stmt: &AirStmtKind, + declared: &HashSet, + known_enums: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + match stmt { + AirStmtKind::Assign { place, rvalue } => { + check_place_locals(place, declared, func_name, ctx, errors); + check_rvalue_locals(rvalue, declared, known_enums, func_name, ctx, errors); + } + AirStmtKind::GcAlloc { local, .. } | AirStmtKind::Alloc { local, .. } => { + check_local(*local, declared, func_name, ctx, errors); + } + AirStmtKind::GcDrop(local) | AirStmtKind::Free(local) => { + check_local(*local, declared, func_name, ctx, errors); + } + AirStmtKind::CallVoid { func, args } => { + check_callee_locals(func, declared, func_name, ctx, errors); + for arg in args { + check_operand_locals(arg, declared, func_name, ctx, errors); + } + } + AirStmtKind::ArenaCreate(_) + | AirStmtKind::ArenaDestroy(_) + | AirStmtKind::MemoryFence(_) => {} + } +} + +fn check_terminator_locals( + term: &AirTerminator, + declared: &HashSet, + _known_enums: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + match term { + AirTerminator::Return(Some(op)) => { + check_operand_locals(op, declared, func_name, ctx, errors); + } + AirTerminator::Branch { cond, .. } => { + check_operand_locals(cond, declared, func_name, ctx, errors); + } + AirTerminator::Switch { discr, .. } => { + check_operand_locals(discr, declared, func_name, ctx, errors); + } + AirTerminator::Invoke { + func, args, ret, .. + } => { + check_callee_locals(func, declared, func_name, ctx, errors); + for arg in args { + check_operand_locals(arg, declared, func_name, ctx, errors); + } + check_place_locals(ret, declared, func_name, ctx, errors); + } + AirTerminator::Return(None) + | AirTerminator::Goto(_) + | AirTerminator::Unwind + | AirTerminator::Unreachable + | AirTerminator::Panic { .. } => {} + } +} + +fn check_rvalue_locals( + rvalue: &Rvalue, + declared: &HashSet, + known_enums: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + match rvalue { + Rvalue::Use(op) | Rvalue::UnaryOp(_, op) | Rvalue::Deref(op) => { + check_operand_locals(op, declared, func_name, ctx, errors); + } + Rvalue::BinaryOp(_, left, right) => { + check_operand_locals(left, declared, func_name, ctx, errors); + check_operand_locals(right, declared, func_name, ctx, errors); + } + Rvalue::Call { func, args } => { + check_callee_locals(func, declared, func_name, ctx, errors); + for arg in args { + check_operand_locals(arg, declared, func_name, ctx, errors); + } + } + Rvalue::StructInit { fields, .. } => { + for (_, operand) in fields { + check_operand_locals(operand, declared, func_name, ctx, errors); + } + } + Rvalue::FieldAccess { base, .. } | Rvalue::Cast { operand: base, .. } => { + check_operand_locals(base, declared, func_name, ctx, errors); + } + Rvalue::Index { base, index } => { + check_operand_locals(base, declared, func_name, ctx, errors); + check_operand_locals(index, declared, func_name, ctx, errors); + } + Rvalue::AddressOf(local) => { + check_local(*local, declared, func_name, ctx, errors); + } + Rvalue::EnumInit { + enum_name, payload, .. + } => { + if !known_enums.contains(enum_name) { + errors.push(AirValidationError { + function_name: func_name.to_string(), + detail: AirValidationDetail::UnknownEnumReference { + enum_name: enum_name.clone(), + context: ctx.to_string(), + }, + }); + } + for operand in payload { + check_operand_locals(operand, declared, func_name, ctx, errors); + } + } + Rvalue::EnumTag { enum_name, operand } => { + if !known_enums.contains(enum_name) { + errors.push(AirValidationError { + function_name: func_name.to_string(), + detail: AirValidationDetail::UnknownEnumReference { + enum_name: enum_name.clone(), + context: ctx.to_string(), + }, + }); + } + check_operand_locals(operand, declared, func_name, ctx, errors); + } + Rvalue::EnumPayload { + enum_name, operand, .. + } => { + if !known_enums.contains(enum_name) { + errors.push(AirValidationError { + function_name: func_name.to_string(), + detail: AirValidationDetail::UnknownEnumReference { + enum_name: enum_name.clone(), + context: ctx.to_string(), + }, + }); + } + check_operand_locals(operand, declared, func_name, ctx, errors); + } + Rvalue::ClosureCreate { env, .. } => { + check_operand_locals(env, declared, func_name, ctx, errors); + } + } +} + +fn check_place_locals( + place: &Place, + declared: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + match place { + Place::Local(local) | Place::Field(local, _) | Place::Deref(local) => { + check_local(*local, declared, func_name, ctx, errors); + } + Place::Index(local, operand) => { + check_local(*local, declared, func_name, ctx, errors); + check_operand_locals(operand, declared, func_name, ctx, errors); + } + } +} + +fn check_operand_locals( + operand: &Operand, + declared: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + match operand { + Operand::Copy(local) | Operand::Move(local) => { + check_local(*local, declared, func_name, ctx, errors); + } + Operand::Const(_) => {} + } +} + +fn check_callee_locals( + callee: &Callee, + declared: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + if let Callee::FnPtr(local) = callee { + check_local(*local, declared, func_name, ctx, errors); + } +} + +fn check_local( + local: LocalId, + declared: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + if !declared.contains(&local) { + errors.push(AirValidationError { + function_name: func_name.to_string(), + detail: AirValidationDetail::UndeclaredLocal { + local_id: local.0, + context: ctx.to_string(), + }, + }); + } +} + +// block reference checking +fn check_block_target_blocks( + block: &AirBlock, + declared: &HashSet, + func_name: &str, + block_ctx: &str, + errors: &mut Vec, +) { + let ctx = format!("{block_ctx}, terminator"); + match &block.terminator { + AirTerminator::Goto(target) => { + check_block_ref(*target, declared, func_name, &ctx, errors); + } + AirTerminator::Branch { + then_block, + else_block, + .. + } => { + check_block_ref(*then_block, declared, func_name, &ctx, errors); + check_block_ref(*else_block, declared, func_name, &ctx, errors); + } + AirTerminator::Switch { + targets, default, .. + } => { + for (_, target) in targets { + check_block_ref(*target, declared, func_name, &ctx, errors); + } + check_block_ref(*default, declared, func_name, &ctx, errors); + } + AirTerminator::Invoke { normal, unwind, .. } => { + check_block_ref(*normal, declared, func_name, &ctx, errors); + check_block_ref(*unwind, declared, func_name, &ctx, errors); + } + AirTerminator::Return(_) + | AirTerminator::Unwind + | AirTerminator::Unreachable + | AirTerminator::Panic { .. } => {} + } +} + +fn check_block_ref( + block: BlockId, + declared: &HashSet, + func_name: &str, + ctx: &str, + errors: &mut Vec, +) { + if !declared.contains(&block) { + errors.push(AirValidationError { + function_name: func_name.to_string(), + detail: AirValidationDetail::UndeclaredBlock { + block_id: block.0, + context: ctx.to_string(), + }, + }); + } +} diff --git a/air/src/print.rs b/air/src/print.rs index fd263b7..3c12365 100644 --- a/air/src/print.rs +++ b/air/src/print.rs @@ -19,6 +19,7 @@ pub fn fmt_type(ty: &AirType) -> String { AirType::Void => "void".into(), AirType::Ptr(inner) => format!("*{}", fmt_type(inner)), AirType::Struct(name) => name.clone(), + AirType::Enum(name) => format!("enum {}", name), AirType::Array(inner, len) => format!("[{}; {}]", fmt_type(inner), len), AirType::Slice(inner) => format!("[{}]", fmt_type(inner)), AirType::FnPtr { params, ret, .. } => { @@ -26,6 +27,7 @@ pub fn fmt_type(ty: &AirType) -> String { format!("fn({}) -> {}", ps.join(", "), fmt_type(ret)) } AirType::Param(id) => format!("T{}", id.0), + AirType::Opaque => "opaque".into(), } } @@ -63,8 +65,29 @@ pub fn fmt_const(c: &AirConst) -> String { AirConst::Bool(b) => b.to_string(), AirConst::Str(s) => format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")), AirConst::Null => "null".into(), + AirConst::FnRef(name) => format!("fnref @{}", name), + AirConst::Enum { + enum_name, + tag, + payload, + } => { + let payload = payload.iter().map(fmt_const).collect::>().join(", "); + format!("enumconst {}#{}({})", enum_name, tag, payload) + } AirConst::ZeroInit(ty) => format!("zeroinit {}", fmt_type(ty)), AirConst::Undef(ty) => format!("undef {}", fmt_type(ty)), + AirConst::Array(elems) => { + let elems = elems.iter().map(fmt_const).collect::>().join(", "); + format!("[{}]", elems) + } + AirConst::Struct { name, fields } => { + let fields = fields + .iter() + .map(|(f, c)| format!("{}: {}", f, fmt_const(c))) + .collect::>() + .join(", "); + format!("{} {{ {} }}", name, fields) + } } } @@ -216,7 +239,56 @@ fn fmt_rvalue(rv: &Rvalue, func: &AirFunction, program: &AirProgram) -> String { Rvalue::Cast { operand, to, .. } => { format!("cast {} -> {}", fmt_operand(operand, func), fmt_type(to)) } - Rvalue::Discriminant(op) => format!("discriminant {}", fmt_operand(op, func)), + Rvalue::Index { base, index } => { + format!( + "index {}[{}]", + fmt_operand(base, func), + fmt_operand(index, func) + ) + } + Rvalue::EnumInit { + enum_name, + variant, + tag, + payload, + } => { + if payload.is_empty() { + format!("enum_init {}::{} (tag={})", enum_name, variant, tag) + } else { + let args: Vec<_> = payload.iter().map(|p| fmt_operand(p, func)).collect(); + format!( + "enum_init {}::{} (tag={}, payload=[{}])", + enum_name, + variant, + tag, + args.join(", ") + ) + } + } + Rvalue::EnumTag { enum_name, operand } => { + format!("enum_tag {} {}", enum_name, fmt_operand(operand, func)) + } + Rvalue::EnumPayload { + enum_name, + tag, + operand, + field_index, + } => { + format!( + "enum_payload {} (tag={}, field={}) {}", + enum_name, + tag, + field_index, + fmt_operand(operand, func) + ) + } + Rvalue::ClosureCreate { fn_name, env } => { + format!( + "closure_create @{} env={}", + fn_name, + fmt_operand(env, func) + ) + } } } diff --git a/backend/Cargo.toml b/backend/Cargo.toml deleted file mode 100644 index dcf9318..0000000 --- a/backend/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "aelys-backend" -version.workspace = true -edition = "2024" - -[dependencies] -aelys-common = { path = "../common" } -aelys-syntax = { path = "../syntax" } -aelys-sema = { path = "../sema" } -aelys-air = { path = "../air" } -aelys-bytecode = { path = "../bytecode" } - -[lib] -doctest = false \ No newline at end of file diff --git a/backend/README.md b/backend/README.md deleted file mode 100644 index 8f5482e..0000000 --- a/backend/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# aelys-backend - -Code generation. Compiles AST to bytecode. diff --git a/backend/src/compiler/builtins.rs b/backend/src/compiler/builtins.rs deleted file mode 100644 index 14c3df3..0000000 --- a/backend/src/compiler/builtins.rs +++ /dev/null @@ -1,10 +0,0 @@ -use super::Compiler; - -impl Compiler { - // VM intrinsics - pub const BUILTINS: &'static [&'static str] = - &["alloc", "free", "load", "store", "type", "__tostring"]; - pub fn is_builtin(name: &str) -> bool { - Self::BUILTINS.contains(&name) - } -} diff --git a/backend/src/compiler/call/assign.rs b/backend/src/compiler/call/assign.rs deleted file mode 100644 index 55c04cc..0000000 --- a/backend/src/compiler/call/assign.rs +++ /dev/null @@ -1,158 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::{OpCode, Value}; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; -use aelys_syntax::ast::{BinaryOp, Expr, ExprKind}; - -impl Compiler { - pub fn compile_assign(&mut self, name: &str, value: &Expr, dest: u8, span: Span) -> Result<()> { - if let Some((reg, mutable)) = self.resolve_variable(name) { - if self.loop_variables.contains(&name.to_string()) { - return Err(CompileError::new( - CompileErrorKind::AssignToLoopVariable(name.to_string()), - span, - self.source.clone(), - ) - .into()); - } - - if !mutable { - return Err(CompileError::new( - CompileErrorKind::AssignToImmutable(name.to_string()), - span, - self.source.clone(), - ) - .into()); - } - - if let ExprKind::Binary { - left, - op: BinaryOp::Add, - right, - } = &value.kind - { - if let (ExprKind::Identifier(id), ExprKind::Int(n)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - self.emit_a(OpCode::AddI, reg, reg, *n as u8, span); - if reg != dest { - self.emit_a(OpCode::Move, dest, reg, 0, span); - } - return Ok(()); - } - if let (ExprKind::Int(n), ExprKind::Identifier(id)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - self.emit_a(OpCode::AddI, reg, reg, *n as u8, span); - if reg != dest { - self.emit_a(OpCode::Move, dest, reg, 0, span); - } - return Ok(()); - } - } - - if let ExprKind::Binary { - left, - op: BinaryOp::Sub, - right, - } = &value.kind - && let (ExprKind::Identifier(id), ExprKind::Int(n)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - self.emit_a(OpCode::SubI, reg, reg, *n as u8, span); - if reg != dest { - self.emit_a(OpCode::Move, dest, reg, 0, span); - } - return Ok(()); - } - - self.compile_expr(value, reg)?; - if reg != dest { - self.emit_a(OpCode::Move, dest, reg, 0, span); - } - - Ok(()) - } else if let Some((upvalue_idx, mutable)) = self.resolve_upvalue(name) { - if !mutable { - return Err(CompileError::new( - CompileErrorKind::AssignToImmutable(name.to_string()), - span, - self.source.clone(), - ) - .into()); - } - - self.compile_expr(value, dest)?; - self.emit_a(OpCode::SetUpval, upvalue_idx, dest, 0, span); - - Ok(()) - } else if let Some(&mutable) = self.globals.get(name) { - if !mutable { - return Err(CompileError::new( - CompileErrorKind::AssignToImmutable(name.to_string()), - span, - self.source.clone(), - ) - .into()); - } - - if !self.global_indices.contains_key(name) - && let ExprKind::Binary { - left, - op: BinaryOp::Add, - right, - } = &value.kind - { - if let (ExprKind::Identifier(id), ExprKind::Int(n)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - let name_ref = self.heap.intern_string(name); - let const_idx = self.add_constant(Value::ptr(name_ref.index()), span)?; - self.emit_c(OpCode::IncGlobalI, dest, const_idx as u8, *n as u8, span); - return Ok(()); - } - if let (ExprKind::Int(n), ExprKind::Identifier(id)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - let name_ref = self.heap.intern_string(name); - let const_idx = self.add_constant(Value::ptr(name_ref.index()), span)?; - self.emit_c(OpCode::IncGlobalI, dest, const_idx as u8, *n as u8, span); - return Ok(()); - } - } - - self.compile_expr(value, dest)?; - - let idx = if let Some(&idx) = self.global_indices.get(name) { - idx - } else { - let idx = self.next_global_index; - self.global_indices.insert(name.to_string(), idx); - self.next_global_index += 1; - idx - }; - self.accessed_globals.insert(name.to_string()); - self.emit_b(OpCode::SetGlobalIdx, dest, idx as i16, span); - - Ok(()) - } else { - Err(CompileError::new( - CompileErrorKind::UndefinedVariable(name.to_string()), - span, - self.source.clone(), - ) - .into()) - } - } -} diff --git a/backend/src/compiler/call/builtins.rs b/backend/src/compiler/call/builtins.rs deleted file mode 100644 index f98a0c4..0000000 --- a/backend/src/compiler/call/builtins.rs +++ /dev/null @@ -1,134 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, ExprKind}; - -impl Compiler { - pub fn try_compile_builtin_call( - &mut self, - callee: &Expr, - args: &[Expr], - dest: u8, - span: Span, - ) -> Result { - let name = match &callee.kind { - ExprKind::Identifier(name) => name.as_str(), - _ => return Ok(false), - }; - - match name { - "alloc" => { - if args.len() != 1 { - return Ok(false); - } - self.compile_builtin_alloc(&args[0], dest, span)?; - Ok(true) - } - "free" => { - if args.len() != 1 { - return Ok(false); - } - self.compile_builtin_free(&args[0], dest, span)?; - Ok(true) - } - "load" => { - if args.len() != 2 { - return Ok(false); - } - self.compile_builtin_load(&args[0], &args[1], dest, span)?; - Ok(true) - } - "store" => { - if args.len() != 3 { - return Ok(false); - } - self.compile_builtin_store(&args[0], &args[1], &args[2], dest, span)?; - Ok(true) - } - _ => Ok(false), - } - } - - fn compile_builtin_alloc(&mut self, size_expr: &Expr, dest: u8, span: Span) -> Result<()> { - let size_reg = self.alloc_register()?; - self.compile_expr(size_expr, size_reg)?; - self.emit_a(OpCode::Alloc, dest, size_reg, 0, span); - self.free_register(size_reg); - Ok(()) - } - - fn compile_builtin_free(&mut self, ptr_expr: &Expr, dest: u8, span: Span) -> Result<()> { - let ptr_reg = self.alloc_register()?; - self.compile_expr(ptr_expr, ptr_reg)?; - self.emit_a(OpCode::Free, ptr_reg, 0, 0, span); - self.free_register(ptr_reg); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - Ok(()) - } - - fn compile_builtin_load( - &mut self, - ptr_expr: &Expr, - offset_expr: &Expr, - dest: u8, - span: Span, - ) -> Result<()> { - if let ExprKind::Int(offset) = &offset_expr.kind - && *offset >= 0 - && *offset <= 255 - { - let ptr_reg = self.alloc_register()?; - self.compile_expr(ptr_expr, ptr_reg)?; - self.emit_a(OpCode::LoadMemI, dest, ptr_reg, *offset as u8, span); - self.free_register(ptr_reg); - return Ok(()); - } - - let ptr_reg = self.alloc_register()?; - let offset_reg = self.alloc_register()?; - self.compile_expr(ptr_expr, ptr_reg)?; - self.compile_expr(offset_expr, offset_reg)?; - self.emit_a(OpCode::LoadMem, dest, ptr_reg, offset_reg, span); - self.free_register(offset_reg); - self.free_register(ptr_reg); - Ok(()) - } - - fn compile_builtin_store( - &mut self, - ptr_expr: &Expr, - offset_expr: &Expr, - value_expr: &Expr, - dest: u8, - span: Span, - ) -> Result<()> { - if let ExprKind::Int(offset) = &offset_expr.kind - && *offset >= 0 - && *offset <= 255 - { - let ptr_reg = self.alloc_register()?; - let val_reg = self.alloc_register()?; - self.compile_expr(ptr_expr, ptr_reg)?; - self.compile_expr(value_expr, val_reg)?; - self.emit_a(OpCode::StoreMemI, ptr_reg, *offset as u8, val_reg, span); - self.free_register(val_reg); - self.free_register(ptr_reg); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - return Ok(()); - } - - let ptr_reg = self.alloc_register()?; - let offset_reg = self.alloc_register()?; - let val_reg = self.alloc_register()?; - self.compile_expr(ptr_expr, ptr_reg)?; - self.compile_expr(offset_expr, offset_reg)?; - self.compile_expr(value_expr, val_reg)?; - self.emit_a(OpCode::StoreMem, ptr_reg, offset_reg, val_reg, span); - self.free_register(val_reg); - self.free_register(offset_reg); - self.free_register(ptr_reg); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - Ok(()) - } -} diff --git a/backend/src/compiler/call/dispatch.rs b/backend/src/compiler/call/dispatch.rs deleted file mode 100644 index 645e9d6..0000000 --- a/backend/src/compiler/call/dispatch.rs +++ /dev/null @@ -1,178 +0,0 @@ -use super::super::Compiler; -use super::util::arg_range_available; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, ExprKind, FmtStringPart}; - -impl Compiler { - // Call dispatch: try specialized opcodes first, fall back to generic Call. - // Order matters - upvalue calls are fastest (no lookup), then module (cached), - // then global (cached), then generic (full lookup every time). - pub fn compile_call( - &mut self, - callee: &Expr, - args: &[Expr], - dest: u8, - span: Span, - ) -> Result<()> { - if args.len() > 255 { - return Err(CompileError::new( - CompileErrorKind::TooManyArguments, - span, - self.source.clone(), - ) - .into()); - } - - // Handle format string with placeholders: func("x={}", x) -> func("x=" + __tostring(x)) - if let Some((fmt_parts, placeholder_count)) = Self::get_fmt_string_placeholders(args) - && placeholder_count > 0 - { - return self.compile_call_with_fmt_placeholders( - callee, - args, - fmt_parts, - placeholder_count, - dest, - span, - ); - } - - // each of these returns true if it handled the call - // builtins first - fastest path for alloc/free/load/store - if self.try_compile_builtin_call(callee, args, dest, span)? { - return Ok(()); - } - - if self.try_compile_upvalue_call(callee, args, dest, span)? { - return Ok(()); - } - - if self.try_compile_module_call(callee, args, dest, span)? { - return Ok(()); - } - - if self.try_compile_global_call(callee, args, dest, span)? { - return Ok(()); - } - - // nothing matched - use the slow path - self.compile_call_generic(callee, args, dest, span) - } - - fn get_fmt_string_placeholders(args: &[Expr]) -> Option<(&[FmtStringPart], usize)> { - if args.is_empty() { - return None; - } - if let ExprKind::FmtString(parts) = &args[0].kind { - let count = parts - .iter() - .filter(|p| matches!(p, FmtStringPart::Placeholder)) - .count(); - return Some((parts, count)); - } - None - } - - fn compile_call_with_fmt_placeholders( - &mut self, - callee: &Expr, - args: &[Expr], - fmt_parts: &[FmtStringPart], - placeholder_count: usize, - dest: u8, - span: Span, - ) -> Result<()> { - let extra_args_needed = placeholder_count; - let extra_args_available = args.len() - 1; - - if extra_args_available < extra_args_needed { - return Err(CompileError::new( - CompileErrorKind::TypeInferenceError(format!( - "format string has {} placeholder(s) but only {} argument(s) provided", - extra_args_needed, extra_args_available - )), - span, - self.source.clone(), - ) - .into()); - } - - let fmt_extra_args = &args[1..1 + extra_args_needed]; - let remaining_args = &args[1 + extra_args_needed..]; - - // Compile: func(fmt_string_expanded, remaining_args...) - let total_args = 1 + remaining_args.len(); - let func_reg = self.alloc_consecutive_registers_for_call(total_args as u8 + 1, span)?; - - for i in 0..=total_args { - let reg = func_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - self.compile_expr(callee, func_reg)?; - - // first arg: the expanded format string - let fmt_reg = func_reg + 1; - self.compile_fmt_string(fmt_parts, fmt_extra_args, fmt_reg, args[0].span)?; - - // remaining args - for (i, arg) in remaining_args.iter().enumerate() { - let arg_reg = func_reg + 2 + i as u8; - self.compile_expr(arg, arg_reg)?; - } - - self.emit_c( - aelys_bytecode::OpCode::Call, - dest, - func_reg, - total_args as u8, - span, - ); - - for i in (0..=total_args).rev() { - let reg = func_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } - - pub(super) fn reserve_arg_registers(&mut self, start: u8, args_len: usize) -> bool { - if !arg_range_available(&self.register_pool, start, args_len) { - return false; - } - for i in 0..args_len { - let arg_reg = start + i as u8; - self.register_pool[arg_reg as usize] = true; - if arg_reg >= self.next_register { - self.next_register = arg_reg + 1; - } - } - true - } - - pub(super) fn release_arg_registers(&mut self, start: u8, args_len: usize) { - for i in (0..args_len).rev() { - let arg_reg = start + i as u8; - self.register_pool[arg_reg as usize] = false; - } - } - - pub(super) fn checked_arg_start(&self, dest: u8) -> Option { - dest.checked_add(1) - } - - pub(super) fn is_member_call(callee: &Expr) -> Option<(&str, &str)> { - if let ExprKind::Member { object, member } = &callee.kind - && let ExprKind::Identifier(module_name) = &object.kind - { - return Some((module_name, member)); - } - None - } -} diff --git a/backend/src/compiler/call/generic.rs b/backend/src/compiler/call/generic.rs deleted file mode 100644 index 838aff3..0000000 --- a/backend/src/compiler/call/generic.rs +++ /dev/null @@ -1,42 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::Expr; - -impl Compiler { - pub fn compile_call_generic( - &mut self, - callee: &Expr, - args: &[Expr], - dest: u8, - span: Span, - ) -> Result<()> { - let nargs = args.len(); - let func_reg = self.alloc_consecutive_registers_for_call(nargs as u8 + 1, span)?; - - for i in 0..=nargs { - let reg = func_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - self.compile_expr(callee, func_reg)?; - - for (i, arg) in args.iter().enumerate() { - let arg_reg = func_reg + 1 + i as u8; - self.compile_expr(arg, arg_reg)?; - } - - self.emit_c(OpCode::Call, dest, func_reg, args.len() as u8, span); - - for i in (0..=nargs).rev() { - let reg = func_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } -} diff --git a/backend/src/compiler/call/global.rs b/backend/src/compiler/call/global.rs deleted file mode 100644 index f490c8a..0000000 --- a/backend/src/compiler/call/global.rs +++ /dev/null @@ -1,69 +0,0 @@ -use super::super::Compiler; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, ExprKind}; - -impl Compiler { - // CallGlobal optimization: if we know it's a global function call, we can - // skip the local/upvalue lookup entirely and use a cached global index. - // Big win for stdlib calls like print(), len(), etc. - pub(super) fn try_compile_global_call( - &mut self, - callee: &Expr, - args: &[Expr], - dest: u8, - span: Span, - ) -> Result { - if let ExprKind::Identifier(name) = &callee.kind - // only use this path if the name isn't shadowed by a local or upvalue - && self.resolve_variable(name).is_none() && self.resolve_upvalue(name).is_none() - { - let global_idx = if let Some(&idx) = self.global_indices.get(name) { - idx - } else if self.globals.contains_key(name) - || Self::is_builtin(name) - || self.known_globals.contains(name) - { - let idx = self.next_global_index; - self.global_indices.insert(name.to_string(), idx); - self.next_global_index += 1; - idx - } else { - return Ok(false); - }; - - self.accessed_globals.insert(name.to_string()); - - // 255 limit: index must fit in a byte for CallGlobal encoding - // TODO: could extend to 16-bit indices with a new opcode if needed - if global_idx <= 255 { - let arg_start = match self.checked_arg_start(dest) { - Some(s) => s, - None => { - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - }; - - if !self.reserve_arg_registers(arg_start, args.len()) { - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - - for (i, arg) in args.iter().enumerate() { - let arg_reg = arg_start + i as u8; - self.compile_expr(arg, arg_reg)?; - } - - self.emit_call_global_cached(dest, global_idx as u8, args.len() as u8, name, span); - self.release_arg_registers(arg_start, args.len()); - return Ok(true); - } - - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - - Ok(false) - } -} diff --git a/backend/src/compiler/call/member.rs b/backend/src/compiler/call/member.rs deleted file mode 100644 index a48c703..0000000 --- a/backend/src/compiler/call/member.rs +++ /dev/null @@ -1,56 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, ExprKind}; - -impl Compiler { - pub fn compile_member_access( - &mut self, - object: &Expr, - member: &str, - dest: u8, - span: Span, - ) -> Result<()> { - if let ExprKind::Identifier(module_name) = &object.kind - && self.module_aliases.contains(module_name) - { - let qualified_name = format!("{}::{}", module_name, member); - - let idx = if let Some(&idx) = self.global_indices.get(&qualified_name) { - idx - } else { - let idx = self.next_global_index; - self.global_indices.insert(qualified_name.clone(), idx); - self.next_global_index += 1; - idx - }; - - self.accessed_globals.insert(qualified_name); - self.emit_b(OpCode::GetGlobalIdx, dest, idx as i16, span); - return Ok(()); - } - - if Self::is_builtin(member) { - let idx = if let Some(&idx) = self.global_indices.get(member) { - idx - } else { - let idx = self.next_global_index; - self.global_indices.insert(member.to_string(), idx); - self.next_global_index += 1; - idx - }; - self.accessed_globals.insert(member.to_string()); - self.emit_b(OpCode::GetGlobalIdx, dest, idx as i16, span); - Ok(()) - } else { - Err(CompileError::new( - CompileErrorKind::UndefinedVariable(member.to_string()), - span, - self.source.clone(), - ) - .into()) - } - } -} diff --git a/backend/src/compiler/call/mod.rs b/backend/src/compiler/call/mod.rs deleted file mode 100644 index aef3f9d..0000000 --- a/backend/src/compiler/call/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod assign; -mod builtins; -mod dispatch; -mod generic; -mod global; -mod member; -mod module_call; -mod upvalue; -mod util; diff --git a/backend/src/compiler/call/module_call.rs b/backend/src/compiler/call/module_call.rs deleted file mode 100644 index a167b93..0000000 --- a/backend/src/compiler/call/module_call.rs +++ /dev/null @@ -1,57 +0,0 @@ -use super::super::Compiler; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::Expr; - -impl Compiler { - pub(super) fn try_compile_module_call( - &mut self, - callee: &Expr, - args: &[Expr], - dest: u8, - span: Span, - ) -> Result { - if let Some((module_name, member)) = Self::is_member_call(callee) - && self.module_aliases.contains(module_name) - { - let qualified_name = format!("{}::{}", module_name, member); - let global_idx = self.get_or_create_global_index(&qualified_name); - self.accessed_globals.insert(qualified_name.clone()); - - if global_idx <= 255 { - let arg_start = match self.checked_arg_start(dest) { - Some(s) => s, - None => { - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - }; - - if !self.reserve_arg_registers(arg_start, args.len()) { - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - - for (i, arg) in args.iter().enumerate() { - let arg_reg = arg_start + i as u8; - self.compile_expr(arg, arg_reg)?; - } - - self.emit_call_global_cached( - dest, - global_idx as u8, - args.len() as u8, - &qualified_name, - span, - ); - self.release_arg_registers(arg_start, args.len()); - return Ok(true); - } - - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - - Ok(false) - } -} diff --git a/backend/src/compiler/call/upvalue.rs b/backend/src/compiler/call/upvalue.rs deleted file mode 100644 index 72631de..0000000 --- a/backend/src/compiler/call/upvalue.rs +++ /dev/null @@ -1,48 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, ExprKind}; - -impl Compiler { - // CallUpval: for calling functions captured from outer scopes. - // this is the fast path for recursive closures, the function is already in the upvalue array, no name lookup needed at runtime. - pub(super) fn try_compile_upvalue_call( - &mut self, - callee: &Expr, - args: &[Expr], - dest: u8, - span: Span, - ) -> Result { - if let ExprKind::Identifier(name) = &callee.kind { - // Not a local? Check if it's captured from an enclosing scope - if self.resolve_variable(name).is_none() - && let Some((upval_idx, _mutable)) = self.resolve_upvalue(name) - { - let arg_start = match self.checked_arg_start(dest) { - Some(s) => s, - None => { - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - }; - - if !self.reserve_arg_registers(arg_start, args.len()) { - self.compile_call_generic(callee, args, dest, span)?; - return Ok(true); - } - - for (i, arg) in args.iter().enumerate() { - let arg_reg = arg_start + i as u8; - self.compile_expr(arg, arg_reg)?; - } - - self.emit_a(OpCode::CallUpval, dest, upval_idx, args.len() as u8, span); - self.release_arg_registers(arg_start, args.len()); - return Ok(true); - } - } - - Ok(false) - } -} diff --git a/backend/src/compiler/call/util.rs b/backend/src/compiler/call/util.rs deleted file mode 100644 index b39de27..0000000 --- a/backend/src/compiler/call/util.rs +++ /dev/null @@ -1,12 +0,0 @@ -pub(super) fn arg_range_available(register_pool: &[bool; 256], start: u8, args_len: usize) -> bool { - for i in 0..args_len { - let arg_reg = match start.checked_add(i as u8) { - Some(r) => r, - None => return false, - }; - if (arg_reg as usize) >= register_pool.len() || register_pool[arg_reg as usize] { - return false; - } - } - true -} diff --git a/backend/src/compiler/constructors.rs b/backend/src/compiler/constructors.rs deleted file mode 100644 index f6e73e3..0000000 --- a/backend/src/compiler/constructors.rs +++ /dev/null @@ -1,199 +0,0 @@ -use super::state::{Compiler, Local, Upvalue}; -use aelys_bytecode::{Function, Heap}; -use aelys_syntax::Source; -use std::collections::{HashMap, HashSet}; -use std::rc::Rc; -use std::sync::Arc; - -impl Compiler { - pub fn new(name: Option, source: Arc) -> Self { - Self { - current: Function::new(name, 0), - source, - scopes: Vec::new(), - locals: Vec::new(), - upvalues: Vec::new(), - enclosing_locals: None, - enclosing_upvalues: None, - all_enclosing_locals: Vec::new(), - loop_stack: Vec::new(), - loop_variables: Vec::new(), - scope_depth: 0, - next_register: 0, - has_no_gc: false, - heap: Heap::new(), - register_pool: [false; 256], - globals: HashMap::new(), - global_indices: HashMap::new(), - next_global_index: 0, - module_aliases: Rc::new(HashSet::new()), - known_globals: Rc::new(HashSet::new()), - known_native_globals: Rc::new(HashSet::new()), - symbol_origins: Rc::new(HashMap::new()), - accessed_globals: HashSet::new(), - next_call_site_slot: 0, - function_depth: 0, - } - } - - // for REPL or nested fns - pub fn with_heap_and_globals( - name: Option, - source: Arc, - heap: Heap, - globals: HashMap, - ) -> Self { - Self { - current: Function::new(name, 0), - source, - scopes: Vec::new(), - locals: Vec::new(), - upvalues: Vec::new(), - enclosing_locals: None, - enclosing_upvalues: None, - all_enclosing_locals: Vec::new(), - loop_stack: Vec::new(), - loop_variables: Vec::new(), - scope_depth: 0, - next_register: 0, - has_no_gc: false, - heap, - register_pool: [false; 256], - globals, - global_indices: HashMap::new(), - next_global_index: 0, - module_aliases: Rc::new(HashSet::new()), - known_globals: Rc::new(HashSet::new()), - known_native_globals: Rc::new(HashSet::new()), - symbol_origins: Rc::new(HashMap::new()), - accessed_globals: HashSet::new(), - next_call_site_slot: 0, - function_depth: 0, - } - } - - #[allow(clippy::too_many_arguments)] - pub fn for_nested_function( - name: Option, - source: Arc, - heap: Heap, - globals: HashMap, - global_indices: HashMap, - next_global_index: u16, - enclosing_locals: Vec, - enclosing_upvalues: Vec, - parent_all_enclosing_locals: Vec>, - module_aliases: Rc>, - known_globals: Rc>, - known_native_globals: Rc>, - symbol_origins: Rc>, - next_call_site_slot: u16, - ) -> Self { - let mut all_enclosing_locals = vec![enclosing_locals.clone()]; - all_enclosing_locals.extend(parent_all_enclosing_locals); - - Self { - current: Function::new(name, 0), - source, - scopes: Vec::new(), - locals: Vec::new(), - upvalues: Vec::new(), - enclosing_locals: Some(enclosing_locals), - enclosing_upvalues: Some(enclosing_upvalues), - all_enclosing_locals, - loop_stack: Vec::new(), - loop_variables: Vec::new(), - scope_depth: 0, - next_register: 0, - has_no_gc: false, - heap, - register_pool: [false; 256], - globals, - global_indices, - next_global_index, - module_aliases, - known_globals, - known_native_globals, - symbol_origins, - accessed_globals: HashSet::new(), - next_call_site_slot, - function_depth: 1, - } - } - - pub fn with_modules( - name: Option, - source: Arc, - module_aliases: HashSet, - known_globals: HashSet, - known_native_globals: HashSet, - symbol_origins: HashMap, - ) -> Self { - Self { - current: Function::new(name, 0), - source, - scopes: Vec::new(), - locals: Vec::new(), - upvalues: Vec::new(), - enclosing_locals: None, - enclosing_upvalues: None, - all_enclosing_locals: Vec::new(), - loop_stack: Vec::new(), - loop_variables: Vec::new(), - scope_depth: 0, - next_register: 0, - has_no_gc: false, - heap: Heap::new(), - register_pool: [false; 256], - globals: HashMap::new(), - global_indices: HashMap::new(), - next_global_index: 0, - module_aliases: Rc::new(module_aliases), - known_globals: Rc::new(known_globals), - known_native_globals: Rc::new(known_native_globals), - symbol_origins: Rc::new(symbol_origins), - accessed_globals: HashSet::new(), - next_call_site_slot: 0, - function_depth: 0, - } - } - - // REPL + modules - pub fn with_modules_and_globals( - name: Option, - source: Arc, - module_aliases: HashSet, - known_globals: HashSet, - known_native_globals: HashSet, - symbol_origins: HashMap, - globals: HashMap, - ) -> Self { - Self { - current: Function::new(name, 0), - source, - scopes: Vec::new(), - locals: Vec::new(), - upvalues: Vec::new(), - enclosing_locals: None, - enclosing_upvalues: None, - all_enclosing_locals: Vec::new(), - loop_stack: Vec::new(), - loop_variables: Vec::new(), - scope_depth: 0, - next_register: 0, - has_no_gc: false, - heap: Heap::new(), - register_pool: [false; 256], - globals, - global_indices: HashMap::new(), - next_global_index: 0, - module_aliases: Rc::new(module_aliases), - known_globals: Rc::new(known_globals), - known_native_globals: Rc::new(known_native_globals), - symbol_origins: Rc::new(symbol_origins), - accessed_globals: HashSet::new(), - next_call_site_slot: 0, - function_depth: 0, - } - } -} diff --git a/backend/src/compiler/emit.rs b/backend/src/compiler/emit.rs deleted file mode 100644 index fa26a6c..0000000 --- a/backend/src/compiler/emit.rs +++ /dev/null @@ -1,99 +0,0 @@ -use super::Compiler; -use aelys_bytecode::{OpCode, Value}; -use aelys_common::Result; -use aelys_syntax::Span; - -// bytecode emission wrappers - add line info for debug - -impl Compiler { - #[inline] - pub fn current_line(&self, span: Span) -> u32 { - span.line - } - - // format A: op | a | b | c (3 regs) - pub fn emit_a(&mut self, op: OpCode, a: u8, b: u8, c: u8, span: Span) { - self.current.emit_a(op, a, b, c, self.current_line(span)); - } - - // format B: op | a | imm16 - pub fn emit_b(&mut self, op: OpCode, a: u8, imm: i16, span: Span) { - self.current.emit_b(op, a, imm, self.current_line(span)); - } - - pub fn emit_c(&mut self, op: OpCode, dest: u8, func: u8, nargs: u8, span: Span) { - self.current - .emit_c(op, dest, func, nargs, self.current_line(span)); - } - - pub fn emit_jump(&mut self, op: OpCode, span: Span) -> usize { - self.current.emit_jump(op, self.current_line(span)) - } - - pub fn emit_jump_if(&mut self, op: OpCode, reg: u8, span: Span) -> usize { - self.current.emit_jump_if(op, reg, self.current_line(span)) - } - - pub fn patch_jump(&mut self, offset: usize) { - self.current.patch_jump(offset); - } - - pub fn emit_return0(&mut self, span: Span) { - self.current - .emit_a(OpCode::Return0, 0, 0, 0, self.current_line(span)); - } - - pub fn current_offset(&self) -> usize { - self.current.current_offset() - } - - pub fn add_constant(&mut self, value: Value, _span: Span) -> Result { - let idx = self.current.add_constant(value); - Ok(idx) - } - - // inline cache: [op|dest|idx|nargs] [cache_lo] [cache_hi|slot_id] - // known natives skip runtime patching - pub fn emit_call_global_cached( - &mut self, - dest: u8, - global_idx: u8, - nargs: u8, - global_name: &str, - span: Span, - ) { - let line = self.current_line(span); - - // Check if this is a known native function (builtin or stdlib) - let is_known_native = - Self::is_builtin(global_name) || self.known_native_globals.contains(global_name); - - if is_known_native { - // Emit CallGlobalNative directly - no runtime patching needed - self.current - .emit_a(OpCode::CallGlobalNative, dest, global_idx, nargs, line); - - // Emit cache word 1: initially 0 (to be populated on first call) - self.current.push_raw(0); - - // Emit cache word 2: initially 0 (arity will be populated on first call) - self.current.push_raw(0); - } else { - // Unknown global type - emit CallGlobal for runtime patching - let slot_id = self.next_call_site_slot; - self.next_call_site_slot += 1; - - self.current - .emit_a(OpCode::CallGlobal, dest, global_idx, nargs, line); - - // Emit cache word 1: initially 0 (no cached ptr yet) - self.current.push_raw(0); - - // Emit cache word 2: slot_id in lower 16 bits - self.current.push_raw(slot_id as u32); - } - - // Record line info for the cache words (same line as the instruction) - self.current.record_lines(2, line); - } -} diff --git a/backend/src/compiler/expr/array.rs b/backend/src/compiler/expr/array.rs deleted file mode 100644 index 192470d..0000000 --- a/backend/src/compiler/expr/array.rs +++ /dev/null @@ -1,161 +0,0 @@ -use crate::compiler::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, TypeAnnotation}; - -impl Compiler { - pub fn compile_array_sized( - &mut self, - _element_type: &Option, - size: &Expr, - dest: u8, - span: Span, - ) -> Result<()> { - // For now, compile as ArrayNewP with size from register - // TODO: Use ArrayFill opcode when added - let size_reg = self.alloc_register()?; - self.compile_expr(size, size_reg)?; - self.emit_a(OpCode::ArrayNewP, dest, size_reg, 0, span); - self.free_register(size_reg); - Ok(()) - } - - pub fn compile_array_literal(&mut self, elements: &[Expr], dest: u8, span: Span) -> Result<()> { - let count = elements.len(); - - if count == 0 { - self.emit_a(OpCode::ArrayNewP, dest, 0, 0, span); - return Ok(()); - } - - let start_reg = self.alloc_consecutive_registers_for_call(count as u8, span)?; - - for i in 0..count { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - for (i, elem) in elements.iter().enumerate() { - let elem_reg = start_reg + i as u8; - self.compile_expr(elem, elem_reg)?; - } - - self.emit_a(OpCode::ArrayLit, dest, start_reg, count as u8, span); - - for i in (0..count).rev() { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } - - pub fn compile_vec_literal(&mut self, elements: &[Expr], dest: u8, span: Span) -> Result<()> { - let count = elements.len(); - - if count == 0 { - self.emit_a(OpCode::VecNewP, dest, 0, 0, span); - return Ok(()); - } - - let start_reg = self.alloc_consecutive_registers_for_call(count as u8, span)?; - - for i in 0..count { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - for (i, elem) in elements.iter().enumerate() { - let elem_reg = start_reg + i as u8; - self.compile_expr(elem, elem_reg)?; - } - - self.emit_a(OpCode::VecLit, dest, start_reg, count as u8, span); - - for i in (0..count).rev() { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } - - pub fn compile_index_access( - &mut self, - object: &Expr, - index: &Expr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_expr(object, obj_reg)?; - - let idx_reg = self.alloc_register()?; - self.compile_expr(index, idx_reg)?; - - self.emit_a(OpCode::ArrayLoadP, dest, obj_reg, idx_reg, span); - - self.free_register(idx_reg); - self.free_register(obj_reg); - - Ok(()) - } - - pub fn compile_index_assign( - &mut self, - object: &Expr, - index: &Expr, - value: &Expr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_expr(object, obj_reg)?; - - let idx_reg = self.alloc_register()?; - self.compile_expr(index, idx_reg)?; - - let val_reg = self.alloc_register()?; - self.compile_expr(value, val_reg)?; - - self.emit_a(OpCode::ArrayStoreP, obj_reg, idx_reg, val_reg, span); - - if dest != val_reg { - self.emit_a(OpCode::Move, dest, val_reg, 0, span); - } - - self.free_register(val_reg); - self.free_register(idx_reg); - self.free_register(obj_reg); - - Ok(()) - } - - pub fn compile_slice( - &mut self, - _object: &Expr, - _range: &Expr, - _dest: u8, - _span: Span, - ) -> Result<()> { - todo!("slice") - } - - pub fn compile_range( - &mut self, - _start: &Option>, - _end: &Option>, - _inclusive: bool, - _dest: u8, - _span: Span, - ) -> Result<()> { - todo!("range") - } -} diff --git a/backend/src/compiler/expr/binary.rs b/backend/src/compiler/expr/binary.rs deleted file mode 100644 index 7eafa5a..0000000 --- a/backend/src/compiler/expr/binary.rs +++ /dev/null @@ -1,142 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{BinaryOp, Expr, ExprKind}; - -impl Compiler { - // Binary ops have a bunch of special cases for immediate values. - // `x + 5` becomes AddI instead of LoadK + Add, saves a register and an instruction. - pub fn compile_binary( - &mut self, - left: &Expr, - op: BinaryOp, - right: &Expr, - dest: u8, - span: Span, - ) -> Result<()> { - // Right operand is small constant: use immediate instructions - if let ExprKind::Int(n) = &right.kind - && *n >= 0 - && *n <= 255 - && let Some(left_reg) = self.get_local_register(left) - { - match op { - BinaryOp::Add => { - self.emit_a(OpCode::AddI, dest, left_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::Sub => { - self.emit_a(OpCode::SubI, dest, left_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::Shl => { - self.emit_a(OpCode::ShlIImm, dest, left_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::Shr => { - self.emit_a(OpCode::ShrIImm, dest, left_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::BitAnd => { - self.emit_a(OpCode::AndIImm, dest, left_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::BitOr => { - self.emit_a(OpCode::OrIImm, dest, left_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::BitXor => { - self.emit_a(OpCode::XorIImm, dest, left_reg, *n as u8, span); - return Ok(()); - } - _ => {} - } - } - - // Commutative ops: `5 + x` can also use AddI (swap operands) - if let ExprKind::Int(n) = &left.kind - && *n >= 0 - && *n <= 255 - && op == BinaryOp::Add - && let Some(right_reg) = self.get_local_register(right) - { - self.emit_a(OpCode::AddI, dest, right_reg, *n as u8, span); - return Ok(()); - } - - // Same for bitwise ops - they're all commutative - if let ExprKind::Int(n) = &left.kind - && *n >= 0 - && *n <= 255 - && let Some(right_reg) = self.get_local_register(right) - { - match op { - BinaryOp::BitAnd => { - self.emit_a(OpCode::AndIImm, dest, right_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::BitOr => { - self.emit_a(OpCode::OrIImm, dest, right_reg, *n as u8, span); - return Ok(()); - } - BinaryOp::BitXor => { - self.emit_a(OpCode::XorIImm, dest, right_reg, *n as u8, span); - return Ok(()); - } - _ => {} - } - } - - // Generic path: evaluate both sides into registers. - // Reuse existing registers when possible to avoid spilling. - let left_local = self.get_local_register(left); - let right_local = self.get_local_register(right); - - let (left_reg, left_allocated) = match left_local { - Some(r) => (r, false), - None => (self.alloc_register()?, true), - }; - - let (right_reg, right_allocated) = match right_local { - Some(r) => (r, false), - None => (self.alloc_register()?, true), - }; - - if left_allocated { - self.compile_expr(left, left_reg)?; - } - if right_allocated { - self.compile_expr(right, right_reg)?; - } - - let opcode = match op { - BinaryOp::Add => OpCode::Add, - BinaryOp::Sub => OpCode::Sub, - BinaryOp::Mul => OpCode::Mul, - BinaryOp::Div => OpCode::Div, - BinaryOp::Mod => OpCode::Mod, - BinaryOp::Lt => OpCode::Lt, - BinaryOp::Le => OpCode::Le, - BinaryOp::Gt => OpCode::Gt, - BinaryOp::Ge => OpCode::Ge, - BinaryOp::Eq => OpCode::Eq, - BinaryOp::Ne => OpCode::Ne, - BinaryOp::Shl => OpCode::Shl, - BinaryOp::Shr => OpCode::Shr, - BinaryOp::BitAnd => OpCode::BitAnd, - BinaryOp::BitOr => OpCode::BitOr, - BinaryOp::BitXor => OpCode::BitXor, - }; - self.emit_a(opcode, dest, left_reg, right_reg, span); - - if right_allocated { - self.free_register(right_reg); - } - if left_allocated { - self.free_register(left_reg); - } - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/control.rs b/backend/src/compiler/expr/control.rs deleted file mode 100644 index 9362ec9..0000000 --- a/backend/src/compiler/expr/control.rs +++ /dev/null @@ -1,28 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::ast::Expr; - -impl Compiler { - // ternary: cond ? then : else - pub fn compile_if_expr( - &mut self, - cond: &Expr, - then_: &Expr, - else_: &Expr, - dest: u8, - ) -> Result<()> { - let tmp = self.alloc_register()?; - self.compile_expr(cond, tmp)?; - let jmp_else = self.emit_jump_if(OpCode::JumpIfNot, tmp, cond.span); - self.free_register(tmp); - - self.compile_expr(then_, dest)?; - let jmp_end = self.emit_jump(OpCode::Jump, then_.span); - self.patch_jump(jmp_else); - - self.compile_expr(else_, dest)?; - self.patch_jump(jmp_end); - Ok(()) - } -} diff --git a/backend/src/compiler/expr/fmt_string.rs b/backend/src/compiler/expr/fmt_string.rs deleted file mode 100644 index 0c7c431..0000000 --- a/backend/src/compiler/expr/fmt_string.rs +++ /dev/null @@ -1,146 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, FmtStringPart}; - -impl Compiler { - /// Compile a format string into concatenation of parts. - /// `extra_args` are used to fill in Placeholder slots. - pub fn compile_fmt_string( - &mut self, - parts: &[FmtStringPart], - extra_args: &[Expr], - dest: u8, - span: Span, - ) -> Result<()> { - let placeholder_count = parts - .iter() - .filter(|p| matches!(p, FmtStringPart::Placeholder)) - .count(); - - if placeholder_count != extra_args.len() { - return Err(CompileError::new( - CompileErrorKind::TypeInferenceError(format!( - "format string has {} placeholder(s) but {} argument(s) provided", - placeholder_count, - extra_args.len() - )), - span, - self.source.clone(), - ) - .into()); - } - - if parts.is_empty() { - return self.compile_literal_string("", dest, span); - } - - if parts.len() == 1 && extra_args.is_empty() { - return self.compile_single_fmt_part(&parts[0], dest, span); - } - - // compile each part and concat them - let mut arg_idx = 0; - let mut result_reg: Option = None; - - for part in parts { - let part_reg = self.alloc_register()?; - - match part { - FmtStringPart::Literal(s) => { - self.compile_literal_string(s, part_reg, span)?; - } - FmtStringPart::Expr(expr) => { - self.compile_expr_to_string(expr, part_reg, span)?; - } - FmtStringPart::Placeholder => { - let arg = &extra_args[arg_idx]; - arg_idx += 1; - self.compile_expr_to_string(arg, part_reg, span)?; - } - } - - match result_reg { - None => { - result_reg = Some(part_reg); - } - Some(acc) => { - self.emit_a(OpCode::Add, acc, acc, part_reg, span); - self.free_register(part_reg); - } - } - } - - if let Some(acc) = result_reg - && acc != dest - { - self.emit_a(OpCode::Move, dest, acc, 0, span); - self.free_register(acc); - } - - Ok(()) - } - - fn compile_single_fmt_part( - &mut self, - part: &FmtStringPart, - dest: u8, - span: Span, - ) -> Result<()> { - match part { - FmtStringPart::Literal(s) => self.compile_literal_string(s, dest, span), - FmtStringPart::Expr(expr) => self.compile_expr_to_string(expr, dest, span), - FmtStringPart::Placeholder => Err(CompileError::new( - CompileErrorKind::TypeInferenceError("placeholder without argument".to_string()), - span, - self.source.clone(), - ) - .into()), - } - } - - fn compile_expr_to_string(&mut self, expr: &Expr, dest: u8, span: Span) -> Result<()> { - // CallGlobalNative reads args from dest+1, so compile the expression there. - let arg_reg = dest + 1; - - if (arg_reg as usize) < self.register_pool.len() && !self.register_pool[arg_reg as usize] { - // fast path: dest+1 is free - self.register_pool[arg_reg as usize] = true; - self.next_register = self.next_register.max(arg_reg + 1); - - self.compile_expr(expr, arg_reg)?; - self.emit_tostring_call(dest, span)?; - - self.register_pool[arg_reg as usize] = false; - } else { - // slow path: dest+1 is occupied, use a fresh consecutive pair - let call_base = self.alloc_consecutive_registers_for_call(2, span)?; - let call_arg = call_base + 1; - - self.register_pool[call_base as usize] = true; - self.register_pool[call_arg as usize] = true; - self.next_register = self.next_register.max(call_arg + 1); - - self.compile_expr(expr, call_arg)?; - self.emit_tostring_call(call_base, span)?; - - if call_base != dest { - self.emit_a(OpCode::Move, dest, call_base, 0, span); - } - - self.register_pool[call_arg as usize] = false; - self.register_pool[call_base as usize] = false; - } - - Ok(()) - } - - fn emit_tostring_call(&mut self, reg: u8, span: Span) -> Result<()> { - let global_idx = self.get_or_create_global_index("__tostring"); - self.accessed_globals.insert("__tostring".to_string()); - self.emit_call_global_cached(reg, global_idx as u8, 1, "__tostring", span); - Ok(()) - } -} diff --git a/backend/src/compiler/expr/identifier.rs b/backend/src/compiler/expr/identifier.rs deleted file mode 100644 index cf86a63..0000000 --- a/backend/src/compiler/expr/identifier.rs +++ /dev/null @@ -1,70 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, ExprKind}; - -impl Compiler { - // Variable resolution order: local -> upvalue -> global. - // Note: we used to emit GetGlobal (name-based lookup) for globals, - // now we use GetGlobalIdx (index-based) for better performance - pub fn compile_identifier(&mut self, name: &str, dest: u8, span: Span) -> Result<()> { - // Local variable - just move from its register (or skip if already in dest) - if let Some((reg, _mutable)) = self.resolve_variable(name) { - if reg != dest { - self.emit_a(OpCode::Move, dest, reg, 0, span); - } - Ok(()) - } else if let Some((upvalue_idx, _mutable)) = self.resolve_upvalue(name) { - self.emit_a(OpCode::GetUpval, dest, upvalue_idx, 0, span); - Ok(()) - } else { - // For direct imports, use the qualified name in global_layout - // so bytecode loading can detect which module to load - let actual_name = - if self.known_globals.contains(name) && !self.globals.contains_key(name) { - self.resolve_global_name(name).to_string() - } else { - name.to_string() - }; - - let idx = if let Some(&idx) = self.global_indices.get(&actual_name) { - idx - } else if self.globals.contains_key(name) - || Self::is_builtin(name) - || self.known_globals.contains(name) - { - let idx = self.next_global_index; - self.global_indices.insert(actual_name.clone(), idx); - self.next_global_index += 1; - idx - } else { - let hint = self.generate_undefined_variable_hint(name); - let error_msg = if let Some(hint_msg) = hint { - format!("{}\n\nhint: {}", name, hint_msg) - } else { - name.to_string() - }; - return Err(CompileError::new( - CompileErrorKind::UndefinedVariable(error_msg), - span, - self.source.clone(), - ) - .into()); - }; - self.accessed_globals.insert(actual_name); - self.emit_b(OpCode::GetGlobalIdx, dest, idx as i16, span); - Ok(()) - } - } - - pub fn get_local_register(&self, expr: &Expr) -> Option { - if let ExprKind::Identifier(name) = &expr.kind - && let Some((reg, _)) = self.resolve_variable(name) - { - return Some(reg); - } - None - } -} diff --git a/backend/src/compiler/expr/identifier_helpers.rs b/backend/src/compiler/expr/identifier_helpers.rs deleted file mode 100644 index c69e4de..0000000 --- a/backend/src/compiler/expr/identifier_helpers.rs +++ /dev/null @@ -1,127 +0,0 @@ -use super::Compiler; - -impl Compiler { - // "did you mean" suggestions for typos - pub(super) fn generate_undefined_variable_hint(&self, var_name: &str) -> Option { - let mut found_modules = Vec::new(); - - for global_name in self.globals.keys() { - if let Some((module, func)) = global_name.split_once("::") - && func == var_name - && !found_modules.contains(&module) - { - found_modules.push(module); - } - } - - if !found_modules.is_empty() { - let best_module = found_modules - .iter() - .find(|m| m.starts_with("std.")) - .or_else(|| found_modules.first())?; - - let display_module = if !best_module.contains('.') && !best_module.contains('/') { - format!("std.{}", best_module) - } else { - best_module.to_string() - }; - - return Some(format!( - "'{}' is available in {}, try: needs {} from {}", - var_name, display_module, var_name, display_module - )); - } - - let similar_locals = self.find_similar_locals(var_name); - if !similar_locals.is_empty() { - return Some(format!("did you mean '{}'?", similar_locals.join("' or '"))); - } - - let similar_globals = self.find_similar_globals(var_name); - if !similar_globals.is_empty() { - return Some(format!( - "did you mean '{}'?", - similar_globals.join("' or '") - )); - } - - None - } - - fn find_similar_locals(&self, var_name: &str) -> Vec { - let mut similar = Vec::new(); - - for local in &self.locals { - if Self::is_similar(var_name, &local.name) { - similar.push(local.name.clone()); - } - } - - similar.truncate(3); - similar - } - - fn find_similar_globals(&self, var_name: &str) -> Vec { - let mut similar = Vec::new(); - - for global_name in self.globals.keys() { - if !global_name.contains("::") && Self::is_similar(var_name, global_name) { - similar.push(global_name.clone()); - } - } - - similar.truncate(3); - similar - } - - fn is_similar(a: &str, b: &str) -> bool { - if a == b { - return false; - } - - if a.len().abs_diff(b.len()) > 2 { - return false; - } - - Self::levenshtein_distance(a, b) <= 2 - } - - fn levenshtein_distance(a: &str, b: &str) -> usize { - let a_chars: Vec = a.chars().collect(); - let b_chars: Vec = b.chars().collect(); - let a_len = a_chars.len(); - let b_len = b_chars.len(); - - if a_len == 0 { - return b_len; - } - if b_len == 0 { - return a_len; - } - - let mut matrix = vec![vec![0; b_len + 1]; a_len + 1]; - - for (i, row) in matrix.iter_mut().enumerate().take(a_len + 1) { - row[0] = i; - } - for (j, val) in matrix[0].iter_mut().enumerate().take(b_len + 1) { - *val = j; - } - - for i in 1..=a_len { - for j in 1..=b_len { - let cost = if a_chars[i - 1] == b_chars[j - 1] { - 0 - } else { - 1 - }; - matrix[i][j] = std::cmp::min( - std::cmp::min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1), - matrix[i - 1][j - 1] + cost, - ); - } - } - - matrix[a_len][b_len] - } -} diff --git a/backend/src/compiler/expr/literal.rs b/backend/src/compiler/expr/literal.rs deleted file mode 100644 index 60d6388..0000000 --- a/backend/src/compiler/expr/literal.rs +++ /dev/null @@ -1,59 +0,0 @@ -use super::Compiler; -use aelys_bytecode::{OpCode, Value}; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; - -// fits in 16-bit immediate? -pub(super) fn small_int_immediate(n: i64) -> Option { - (n >= i16::MIN as i64 && n <= i16::MAX as i64).then_some(n as i16) -} - -impl Compiler { - pub fn compile_literal_int(&mut self, n: i64, dest: u8, span: Span) -> Result<()> { - match small_int_immediate(n) { - Some(imm) => { - self.emit_b(OpCode::LoadI, dest, imm, span); - } - None => { - let val = Value::int_checked(n).map_err(|_| { - CompileError::new( - CompileErrorKind::IntegerOverflow { - value: n.to_string(), - min: Value::INT_MIN, - max: Value::INT_MAX, - }, - span, - self.source.clone(), - ) - })?; - let k = self.add_constant(val, span)?; - self.emit_b(OpCode::LoadK, dest, k as i16, span); - } - } - Ok(()) - } - - pub fn compile_literal_float(&mut self, f: f64, dest: u8, span: Span) -> Result<()> { - let k = self.add_constant(Value::float(f), span)?; - self.emit_b(OpCode::LoadK, dest, k as i16, span); - Ok(()) - } - - pub fn compile_literal_string(&mut self, s: &str, dest: u8, span: Span) -> Result<()> { - let ref_ = self.heap.intern_string(s); - let k = self.add_constant(Value::ptr(ref_.index()), span)?; - self.emit_b(OpCode::LoadK, dest, k as i16, span); - Ok(()) - } - - pub fn compile_literal_bool(&mut self, b: bool, dest: u8, span: Span) -> Result<()> { - self.emit_a(OpCode::LoadBool, dest, if b { 1 } else { 0 }, 0, span); - Ok(()) - } - - pub fn compile_literal_null(&mut self, dest: u8, span: Span) -> Result<()> { - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - Ok(()) - } -} diff --git a/backend/src/compiler/expr/logic.rs b/backend/src/compiler/expr/logic.rs deleted file mode 100644 index f894fd3..0000000 --- a/backend/src/compiler/expr/logic.rs +++ /dev/null @@ -1,25 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::Expr; - -impl Compiler { - // Short-circuit: if left is false, skip right - pub fn compile_and(&mut self, left: &Expr, right: &Expr, dest: u8, span: Span) -> Result<()> { - self.compile_expr(left, dest)?; - let jump = self.emit_jump_if(OpCode::JumpIfNot, dest, span); - self.compile_expr(right, dest)?; - self.patch_jump(jump); - Ok(()) - } - - // Short-circuit: if left is true, skip right - pub fn compile_or(&mut self, left: &Expr, right: &Expr, dest: u8, span: Span) -> Result<()> { - self.compile_expr(left, dest)?; - let jump = self.emit_jump_if(OpCode::JumpIf, dest, span); - self.compile_expr(right, dest)?; - self.patch_jump(jump); - Ok(()) - } -} diff --git a/backend/src/compiler/expr/mod.rs b/backend/src/compiler/expr/mod.rs deleted file mode 100644 index 044e57c..0000000 --- a/backend/src/compiler/expr/mod.rs +++ /dev/null @@ -1,84 +0,0 @@ -mod array; -mod binary; -mod control; -mod fmt_string; -mod identifier; -mod identifier_helpers; -mod literal; -mod logic; -mod typed; -mod unary; - -use super::Compiler; -use aelys_common::Result; -use aelys_syntax::ast::{Expr, ExprKind}; - -impl Compiler { - pub fn compile_expr(&mut self, expr: &Expr, dest: u8) -> Result<()> { - match &expr.kind { - ExprKind::Int(n) => self.compile_literal_int(*n, dest, expr.span), - ExprKind::Float(f) => self.compile_literal_float(*f, dest, expr.span), - ExprKind::String(s) => self.compile_literal_string(s, dest, expr.span), - ExprKind::FmtString(parts) => self.compile_fmt_string(parts, &[], dest, expr.span), - ExprKind::Bool(b) => self.compile_literal_bool(*b, dest, expr.span), - ExprKind::Null => self.compile_literal_null(dest, expr.span), - ExprKind::Identifier(name) => self.compile_identifier(name, dest, expr.span), - ExprKind::Binary { left, op, right } => { - self.compile_binary(left, *op, right, dest, expr.span) - } - ExprKind::Unary { op, operand } => self.compile_unary(*op, operand, dest, expr.span), - ExprKind::And { left, right } => self.compile_and(left, right, dest, expr.span), - ExprKind::Or { left, right } => self.compile_or(left, right, dest, expr.span), - ExprKind::Call { callee, args } => self.compile_call(callee, args, dest, expr.span), - ExprKind::Assign { name, value } => self.compile_assign(name, value, dest, expr.span), - ExprKind::Grouping(inner) => self.compile_expr(inner, dest), - ExprKind::If { - condition, - then_branch, - else_branch, - } => self.compile_if_expr(condition, then_branch, else_branch, dest), - ExprKind::Lambda { - params, - return_type: _, - body, - } => self.compile_lambda(params, body, dest, expr.span), - ExprKind::Member { object, member } => { - self.compile_member_access(object, member, dest, expr.span) - } - ExprKind::ArrayLiteral { elements, .. } => { - self.compile_array_literal(elements, dest, expr.span) - } - ExprKind::ArraySized { element_type, size } => { - self.compile_array_sized(element_type, size, dest, expr.span) - } - ExprKind::VecLiteral { elements, .. } => { - self.compile_vec_literal(elements, dest, expr.span) - } - ExprKind::Index { object, index } => { - self.compile_index_access(object, index, dest, expr.span) - } - ExprKind::IndexAssign { - object, - index, - value, - } => self.compile_index_assign(object, index, value, dest, expr.span), - ExprKind::Range { - start, - end, - inclusive, - } => self.compile_range(start, end, *inclusive, dest, expr.span), - ExprKind::Slice { object, range } => self.compile_slice(object, range, dest, expr.span), - ExprKind::StructLiteral { .. } => Err(aelys_common::error::AelysError::Compile( - aelys_common::error::CompileError::new( - aelys_common::error::CompileErrorKind::TypeInferenceError( - "structs are not supported in the VM backend".to_string(), - ), - expr.span, - self.source.clone(), - ), - )), - // cast: sized types collapse in VM backend - ExprKind::Cast { expr: inner, .. } => self.compile_expr(inner, dest), - } - } -} diff --git a/backend/src/compiler/expr/typed/array.rs b/backend/src/compiler/expr/typed/array.rs deleted file mode 100644 index 7a4479a..0000000 --- a/backend/src/compiler/expr/typed/array.rs +++ /dev/null @@ -1,368 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_sema::{InferType, ResolvedType, TypedExpr}; -use aelys_syntax::Span; - -impl Compiler { - pub(super) fn compile_typed_array_sized( - &mut self, - element_type: &Option, - size: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - // Compile size expression - let size_reg = self.alloc_register()?; - self.compile_typed_expr(size, size_reg)?; - - // Select opcode based on element type - let opcode = match element_type { - Some(t) if t.is_integer() => OpCode::ArrayNewI, - Some(t) if t.is_float() => OpCode::ArrayNewF, - Some(ResolvedType::Bool) => OpCode::ArrayNewB, - _ => OpCode::ArrayNewP, - }; - - self.emit_a(opcode, dest, size_reg, 0, span); - self.free_register(size_reg); - Ok(()) - } - - pub(super) fn compile_typed_array_literal( - &mut self, - expr_ty: &InferType, - elements: &[TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - let count = elements.len(); - - if count == 0 { - let opcode = if let InferType::Array(inner) = expr_ty { - Self::select_typed_opcode( - inner, - OpCode::ArrayNewI, - OpCode::ArrayNewF, - OpCode::ArrayNewB, - OpCode::ArrayNewP, - ) - } else { - OpCode::ArrayNewP - }; - self.emit_a(opcode, dest, 0, 0, span); - return Ok(()); - } - - let start_reg = self.alloc_consecutive_registers_for_call(count as u8, span)?; - - for i in 0..count { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - for (i, elem) in elements.iter().enumerate() { - let elem_reg = start_reg + i as u8; - self.compile_typed_expr(elem, elem_reg)?; - } - - self.emit_a(OpCode::ArrayLit, dest, start_reg, count as u8, span); - - for i in (0..count).rev() { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } - - pub(super) fn compile_typed_vec_literal( - &mut self, - expr_ty: &InferType, - elements: &[TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - let count = elements.len(); - - if count == 0 { - let opcode = if let InferType::Vec(inner) = expr_ty { - Self::select_typed_opcode( - inner, - OpCode::VecNewI, - OpCode::VecNewF, - OpCode::VecNewB, - OpCode::VecNewP, - ) - } else { - OpCode::VecNewP - }; - self.emit_a(opcode, dest, 0, 0, span); - return Ok(()); - } - - let start_reg = self.alloc_consecutive_registers_for_call(count as u8, span)?; - - for i in 0..count { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - for (i, elem) in elements.iter().enumerate() { - let elem_reg = start_reg + i as u8; - self.compile_typed_expr(elem, elem_reg)?; - } - - self.emit_a(OpCode::VecLit, dest, start_reg, count as u8, span); - - for i in (0..count).rev() { - let reg = start_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } - - pub(super) fn compile_typed_index_access( - &mut self, - object: &TypedExpr, - index: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - - let idx_reg = self.alloc_register()?; - self.compile_typed_expr(index, idx_reg)?; - - let opcode = match &object.ty { - InferType::Vec(inner) => Self::select_typed_opcode( - inner, - OpCode::VecLoadI, - OpCode::VecLoadF, - OpCode::VecLoadB, - OpCode::VecLoadP, - ), - InferType::Array(inner) => Self::select_typed_opcode( - inner, - OpCode::ArrayLoadI, - OpCode::ArrayLoadF, - OpCode::ArrayLoadB, - OpCode::ArrayLoadP, - ), - InferType::String => OpCode::StringLoadChar, - _ => OpCode::VecLoadP, - }; - - self.emit_a(opcode, dest, obj_reg, idx_reg, span); - - self.free_register(idx_reg); - self.free_register(obj_reg); - - Ok(()) - } - - pub(super) fn compile_typed_index_assign( - &mut self, - object: &TypedExpr, - index: &TypedExpr, - value: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - - let idx_reg = self.alloc_register()?; - self.compile_typed_expr(index, idx_reg)?; - - let val_reg = self.alloc_register()?; - self.compile_typed_expr(value, val_reg)?; - - let opcode = match &object.ty { - InferType::Vec(inner) => Self::select_typed_opcode( - inner, - OpCode::VecStoreI, - OpCode::VecStoreF, - OpCode::VecStoreB, - OpCode::VecStoreP, - ), - InferType::Array(inner) => Self::select_typed_opcode( - inner, - OpCode::ArrayStoreI, - OpCode::ArrayStoreF, - OpCode::ArrayStoreB, - OpCode::ArrayStoreP, - ), - _ => OpCode::VecStoreP, - }; - - self.emit_a(opcode, obj_reg, idx_reg, val_reg, span); - - if dest != val_reg { - self.emit_a(OpCode::Move, dest, val_reg, 0, span); - } - - self.free_register(val_reg); - self.free_register(idx_reg); - self.free_register(obj_reg); - - Ok(()) - } - - pub(super) fn compile_typed_slice( - &mut self, - _object: &TypedExpr, - _range: &TypedExpr, - _dest: u8, - _span: Span, - ) -> Result<()> { - todo!("slice") - } - - pub(super) fn compile_typed_range( - &mut self, - _start: &Option>, - _end: &Option>, - _inclusive: bool, - _dest: u8, - _span: Span, - ) -> Result<()> { - todo!("range") - } - - fn select_typed_opcode( - inner: &InferType, - i: OpCode, - f: OpCode, - b: OpCode, - p: OpCode, - ) -> OpCode { - match inner { - t if t.is_integer() => i, - t if t.is_float() => f, - InferType::Bool => b, - _ => p, - } - } - - pub(super) fn compile_array_len( - &mut self, - object: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - self.emit_a(OpCode::ArrayLen, dest, obj_reg, 0, span); - self.free_register(obj_reg); - Ok(()) - } - - pub(super) fn compile_vec_len( - &mut self, - object: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - self.emit_a(OpCode::VecLen, dest, obj_reg, 0, span); - self.free_register(obj_reg); - Ok(()) - } - - pub(super) fn compile_vec_push( - &mut self, - object: &TypedExpr, - inner: &InferType, - value: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - - let val_reg = self.alloc_register()?; - self.compile_typed_expr(value, val_reg)?; - - let opcode = Self::select_typed_opcode( - inner, - OpCode::VecPushI, - OpCode::VecPushF, - OpCode::VecPushB, - OpCode::VecPushP, - ); - - self.emit_a(opcode, obj_reg, val_reg, 0, span); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - - self.free_register(val_reg); - self.free_register(obj_reg); - Ok(()) - } - - pub(super) fn compile_vec_pop( - &mut self, - object: &TypedExpr, - inner: &InferType, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - - let opcode = Self::select_typed_opcode( - inner, - OpCode::VecPopI, - OpCode::VecPopF, - OpCode::VecPopB, - OpCode::VecPopP, - ); - - self.emit_a(opcode, dest, obj_reg, 0, span); - self.free_register(obj_reg); - Ok(()) - } - - pub(super) fn compile_vec_capacity( - &mut self, - object: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - self.emit_a(OpCode::VecCap, dest, obj_reg, 0, span); - self.free_register(obj_reg); - Ok(()) - } - - pub(super) fn compile_vec_reserve( - &mut self, - object: &TypedExpr, - capacity: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let obj_reg = self.alloc_register()?; - self.compile_typed_expr(object, obj_reg)?; - - let cap_reg = self.alloc_register()?; - self.compile_typed_expr(capacity, cap_reg)?; - - self.emit_a(OpCode::VecReserve, obj_reg, cap_reg, 0, span); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - - self.free_register(cap_reg); - self.free_register(obj_reg); - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/assign.rs b/backend/src/compiler/expr/typed/assign.rs deleted file mode 100644 index afda8e5..0000000 --- a/backend/src/compiler/expr/typed/assign.rs +++ /dev/null @@ -1,58 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; - -impl Compiler { - pub(super) fn compile_typed_assign( - &mut self, - name: &str, - value: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - self.compile_typed_expr(value, dest)?; - - if let Some((reg, mutable)) = self.resolve_variable(name) { - if !mutable { - return Err(CompileError::new( - CompileErrorKind::AssignToImmutable(name.to_string()), - span, - self.source.clone(), - ) - .into()); - } - if reg != dest { - self.emit_a(OpCode::Move, reg, dest, 0, span); - } - } else if let Some((upval_idx, mutable)) = self.resolve_upvalue(name) { - if !mutable { - return Err(CompileError::new( - CompileErrorKind::AssignToImmutable(name.to_string()), - span, - self.source.clone(), - ) - .into()); - } - self.emit_a(OpCode::SetUpval, upval_idx, dest, 0, span); - } else { - if let Some(&mutable) = self.globals.get(name) - && !mutable - { - return Err(CompileError::new( - CompileErrorKind::AssignToImmutable(name.to_string()), - span, - self.source.clone(), - ) - .into()); - } - // For assignments to user-defined globals, use raw index without translation - let idx = self.get_or_create_global_index_raw(name); - self.accessed_globals.insert(name.to_string()); - self.emit_b(OpCode::SetGlobalIdx, dest, idx as i16, span); - } - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/binary.rs b/backend/src/compiler/expr/typed/binary.rs deleted file mode 100644 index f83b628..0000000 --- a/backend/src/compiler/expr/typed/binary.rs +++ /dev/null @@ -1,78 +0,0 @@ -use super::super::Compiler; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::BinaryOp; - -impl Compiler { - fn get_typed_local_register(&self, expr: &aelys_sema::TypedExpr) -> Option { - if let aelys_sema::TypedExprKind::Identifier(name) = &expr.kind - && let Some((reg, _)) = self.resolve_variable(name) - { - return Some(reg); - } - None - } - - pub(super) fn compile_typed_binary( - &mut self, - left: &aelys_sema::TypedExpr, - op: BinaryOp, - right: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let left_resolved = aelys_sema::ResolvedType::from_infer_type(&left.ty); - let right_resolved = aelys_sema::ResolvedType::from_infer_type(&right.ty); - let opcode = crate::opcode_select::select_opcode(op, &left_resolved, &right_resolved); - - if let Some(left_local_reg) = self.get_typed_local_register(left) { - let (right_reg, right_needs_free) = - if let Some(r) = self.get_typed_local_register(right) { - (r, false) - } else { - let temp = self.alloc_register()?; - self.compile_typed_expr(right, temp)?; - (temp, true) - }; - - self.emit_a(opcode, dest, left_local_reg, right_reg, span); - - if right_needs_free { - self.free_register(right_reg); - } - return Ok(()); - } - - if let Some(right_local_reg) = self.get_typed_local_register(right) { - self.compile_typed_expr(left, dest)?; - self.emit_a(opcode, dest, dest, right_local_reg, span); - return Ok(()); - } - - let right_has_side_effects = Self::typed_expr_may_have_side_effects(right); - - if right_has_side_effects { - let left_reg = self.alloc_register()?; - self.compile_typed_expr(left, left_reg)?; - - let right_reg = self.alloc_register()?; - self.compile_typed_expr(right, right_reg)?; - - self.emit_a(opcode, dest, left_reg, right_reg, span); - - self.free_register(right_reg); - self.free_register(left_reg); - } else { - self.compile_typed_expr(left, dest)?; - - let right_reg = self.alloc_register()?; - self.compile_typed_expr(right, right_reg)?; - - self.emit_a(opcode, dest, dest, right_reg, span); - - self.free_register(right_reg); - } - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/call.rs b/backend/src/compiler/expr/typed/call.rs deleted file mode 100644 index ca10de7..0000000 --- a/backend/src/compiler/expr/typed/call.rs +++ /dev/null @@ -1,576 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_sema::{InferType, TypedFmtStringPart}; -use aelys_syntax::Span; - -impl Compiler { - pub(super) fn compile_typed_call( - &mut self, - callee: &aelys_sema::TypedExpr, - args: &[aelys_sema::TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - use aelys_sema::TypedExprKind; - - // Handle format string with placeholders: func("x={}", x) -> func("x=" + __tostring(x)) - if let Some((fmt_parts, placeholder_count)) = Self::get_typed_fmt_placeholders(args) - && placeholder_count > 0 - { - return self.compile_typed_call_with_fmt_placeholders( - callee, - args, - fmt_parts, - placeholder_count, - dest, - span, - ); - } - - // Check for Array/Vec method calls first - if let TypedExprKind::Member { object, member } = &callee.kind { - // Handle Array methods - if let InferType::Array(_) = &object.ty - && member == "len" - && args.is_empty() - { - return self.compile_array_len(object, dest, span); - } - - // Handle Vec methods - if let InferType::Vec(inner) = &object.ty { - match member.as_str() { - "len" if args.is_empty() => { - return self.compile_vec_len(object, dest, span); - } - "push" if args.len() == 1 => { - return self.compile_vec_push(object, inner, &args[0], dest, span); - } - "pop" if args.is_empty() => { - return self.compile_vec_pop(object, inner, dest, span); - } - "capacity" if args.is_empty() => { - return self.compile_vec_capacity(object, dest, span); - } - "reserve" if args.len() == 1 => { - return self.compile_vec_reserve(object, &args[0], dest, span); - } - _ => {} - } - } - - // Handle String methods: s.method(args) → string::method(s, args...) - if matches!(&object.ty, InferType::String) - && let Some(expected_args) = Self::string_method_arity(member) - && args.len() == expected_args - { - return self.compile_string_method_call(object, member, args, dest, span); - } - - // Handle to_string() on any type - if member == "to_string" && args.is_empty() { - return self.compile_tostring_method(object, dest, span); - } - - // Module alias calls must be checked before Dynamic dispatch, - // otherwise methods like "join" get intercepted as string methods - if let TypedExprKind::Identifier(module_name) = &object.kind - && self.module_aliases.contains(module_name) - { - let qualified_name = format!("{}::{}", module_name, member); - let global_idx = self.get_or_create_global_index(&qualified_name); - self.accessed_globals.insert(qualified_name.clone()); - - if global_idx <= 255 { - let arg_start = match dest.checked_add(1) { - Some(s) => s, - None => { - return self.compile_typed_call_fallback(callee, args, dest, span); - } - }; - - let mut can_use_callglobal = true; - for i in 0..args.len() { - let arg_reg = match arg_start.checked_add(i as u8) { - Some(r) => r, - None => { - can_use_callglobal = false; - break; - } - }; - if (arg_reg as usize) >= self.register_pool.len() - || self.register_pool[arg_reg as usize] - { - can_use_callglobal = false; - break; - } - } - - if can_use_callglobal { - for i in 0..args.len() { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = true; - if arg_reg >= self.next_register { - self.next_register = arg_reg + 1; - } - } - - for (i, arg) in args.iter().enumerate() { - let arg_reg = arg_start + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_call_global_cached( - dest, - global_idx as u8, - args.len() as u8, - &qualified_name, - span, - ); - - for i in (0..args.len()).rev() { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = false; - } - - return Ok(()); - } - } - } - - // Handle Vec/Array/String methods on Dynamic-typed objects (runtime dispatch) - // Vec/collection methods first — len uses polymorphic VecLen opcode - if matches!(&object.ty, InferType::Dynamic | InferType::Var(_)) { - match member.as_str() { - "len" if args.is_empty() => { - return self.compile_vec_len(object, dest, span); - } - "push" if args.len() == 1 => { - return self.compile_vec_push( - object, - &InferType::Dynamic, - &args[0], - dest, - span, - ); - } - "pop" if args.is_empty() => { - return self.compile_vec_pop(object, &InferType::Dynamic, dest, span); - } - "capacity" if args.is_empty() => { - return self.compile_vec_capacity(object, dest, span); - } - "reserve" if args.len() == 1 => { - return self.compile_vec_reserve(object, &args[0], dest, span); - } - _ => {} - } - - // String methods on dynamic types (excludes len, handled above) - if let Some(expected_args) = Self::string_method_arity(member) - && args.len() == expected_args - { - return self.compile_string_method_call(object, member, args, dest, span); - } - } - } - - if let TypedExprKind::Identifier(name) = &callee.kind { - if Self::is_builtin(name) { - return self.compile_typed_builtin_call(name, args, dest, span); - } - - if self.resolve_variable(name).is_none() && self.resolve_upvalue(name).is_none() { - if !self.globals.contains_key(name) && !self.known_globals.contains(name) { - return self.compile_typed_call_fallback(callee, args, dest, span); - } - let actual_name = self.resolve_global_name(name).to_string(); - let global_idx = self.get_or_create_global_index(name); - self.accessed_globals.insert(actual_name.clone()); - - if global_idx <= 255 { - let arg_start = match dest.checked_add(1) { - Some(s) => s, - None => return self.compile_typed_call_fallback(callee, args, dest, span), - }; - - let mut can_use_callglobal = true; - for i in 0..args.len() { - let arg_reg = match arg_start.checked_add(i as u8) { - Some(r) => r, - None => { - can_use_callglobal = false; - break; - } - }; - if (arg_reg as usize) >= self.register_pool.len() - || self.register_pool[arg_reg as usize] - { - can_use_callglobal = false; - break; - } - } - - if can_use_callglobal { - for i in 0..args.len() { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = true; - if arg_reg >= self.next_register { - self.next_register = arg_reg + 1; - } - } - - for (i, arg) in args.iter().enumerate() { - let arg_reg = arg_start + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_call_global_cached( - dest, - global_idx as u8, - args.len() as u8, - name, - span, - ); - - for i in (0..args.len()).rev() { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = false; - } - - return Ok(()); - } - } - } - } - - self.compile_typed_call_fallback(callee, args, dest, span) - } - - pub(super) fn compile_typed_call_fallback( - &mut self, - callee: &aelys_sema::TypedExpr, - args: &[aelys_sema::TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - let nargs = args.len(); - let callee_reg = self.alloc_consecutive_registers_for_call(nargs as u8 + 1, span)?; - - for i in 0..=nargs { - let reg = callee_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - self.compile_typed_expr(callee, callee_reg)?; - - for (i, arg) in args.iter().enumerate() { - let arg_reg = callee_reg + 1 + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_a(OpCode::Call, dest, callee_reg, args.len() as u8, span); - - for i in (0..=nargs).rev() { - let reg = callee_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } - - /// returns the number of extra args (excluding self) expected by a string method, or None if not a valid method. - fn string_method_arity(method: &str) -> Option { - match method { - // 0-arg methods (only self) - "len" | "char_len" | "chars" | "bytes" | "to_upper" | "to_lower" | "capitalize" - | "trim" | "trim_start" | "trim_end" | "is_empty" | "is_whitespace" | "is_numeric" - | "is_alphabetic" | "is_alphanumeric" | "reverse" | "lines" | "line_count" => Some(0), - // 1-arg methods (self + 1 arg) - "char_at" | "byte_at" | "contains" | "starts_with" | "ends_with" | "find" | "rfind" - | "count" | "split" | "repeat" | "concat" => Some(1), - // 2-arg methods (self + 2 args) - "substr" | "replace" | "replace_first" | "pad_left" | "pad_right" => Some(2), - // join takes self + 1 arg (separator) - "join" => Some(1), - _ => None, - } - } - - /// compile s.method(args) as string::method(s, args...) - fn compile_string_method_call( - &mut self, - object: &aelys_sema::TypedExpr, - method: &str, - args: &[aelys_sema::TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - let qualified_name = format!("string::{}", method); - let total_args = 1 + args.len(); // self + extra args - - let global_idx = self.get_or_create_global_index(&qualified_name); - self.accessed_globals.insert(qualified_name.clone()); - - if global_idx <= 255 { - // Try to use CallGlobalCached: args go in dest+1, dest+2, ... - let arg_start = match dest.checked_add(1) { - Some(s) => s, - None => { - return self.compile_string_method_call_fallback( - object, - &qualified_name, - args, - dest, - span, - ); - } - }; - - let mut can_use_callglobal = true; - for i in 0..total_args { - let arg_reg = match arg_start.checked_add(i as u8) { - Some(r) => r, - None => { - can_use_callglobal = false; - break; - } - }; - if (arg_reg as usize) >= self.register_pool.len() - || self.register_pool[arg_reg as usize] - { - can_use_callglobal = false; - break; - } - } - - if can_use_callglobal { - // Reserve registers - for i in 0..total_args { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = true; - if arg_reg >= self.next_register { - self.next_register = arg_reg + 1; - } - } - - // First arg: the string object itself - self.compile_typed_expr(object, arg_start)?; - - // Remaining args - for (i, arg) in args.iter().enumerate() { - let arg_reg = arg_start + 1 + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_call_global_cached( - dest, - global_idx as u8, - total_args as u8, - &qualified_name, - span, - ); - - // Free registers - for i in (0..total_args).rev() { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = false; - } - - return Ok(()); - } - } - - self.compile_string_method_call_fallback(object, &qualified_name, args, dest, span) - } - - fn compile_string_method_call_fallback( - &mut self, - object: &aelys_sema::TypedExpr, - qualified_name: &str, - args: &[aelys_sema::TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - let total_args = 1 + args.len(); - let callee_reg = self.alloc_consecutive_registers_for_call(total_args as u8 + 1, span)?; - - for i in 0..=total_args { - let reg = callee_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - // load the function by global index (avoids known_globals check) - let global_idx = self.get_or_create_global_index(qualified_name); - self.accessed_globals.insert(qualified_name.to_string()); - self.emit_b(OpCode::GetGlobalIdx, callee_reg, global_idx as i16, span); - - // first arg: the string object - self.compile_typed_expr(object, callee_reg + 1)?; - - // remaining stuff - for (i, arg) in args.iter().enumerate() { - let arg_reg = callee_reg + 2 + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_a(OpCode::Call, dest, callee_reg, total_args as u8, span); - - for i in (0..=total_args).rev() { - let reg = callee_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } - - /// compiled obj.to_string() as __tostring(obj) - fn compile_tostring_method( - &mut self, - object: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let qualified_name = "__tostring"; - let global_idx = self.get_or_create_global_index(qualified_name); - self.accessed_globals.insert(qualified_name.to_string()); - - if global_idx <= 255 { - let arg_start = match dest.checked_add(1) { - Some(s) - if (s as usize) < self.register_pool.len() - && !self.register_pool[s as usize] => - { - s - } - _ => { - return self.compile_tostring_method_fallback(object, dest, span); - } - }; - - self.register_pool[arg_start as usize] = true; - if arg_start >= self.next_register { - self.next_register = arg_start + 1; - } - - self.compile_typed_expr(object, arg_start)?; - - self.emit_call_global_cached(dest, global_idx as u8, 1, qualified_name, span); - - self.register_pool[arg_start as usize] = false; - return Ok(()); - } - - self.compile_tostring_method_fallback(object, dest, span) - } - - fn compile_tostring_method_fallback( - &mut self, - object: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let callee_reg = self.alloc_consecutive_registers_for_call(2, span)?; - - self.register_pool[callee_reg as usize] = true; - self.register_pool[(callee_reg + 1) as usize] = true; - if callee_reg + 1 >= self.next_register { - self.next_register = callee_reg + 2; - } - - let global_idx = self.get_or_create_global_index("__tostring"); - self.accessed_globals.insert("__tostring".to_string()); - self.emit_b(OpCode::GetGlobalIdx, callee_reg, global_idx as i16, span); - self.compile_typed_expr(object, callee_reg + 1)?; - - self.emit_a(OpCode::Call, dest, callee_reg, 1, span); - - self.register_pool[(callee_reg + 1) as usize] = false; - self.register_pool[callee_reg as usize] = false; - - Ok(()) - } - - fn get_typed_fmt_placeholders( - args: &[aelys_sema::TypedExpr], - ) -> Option<(&[TypedFmtStringPart], usize)> { - use aelys_sema::TypedExprKind; - if args.is_empty() { - return None; - } - if let TypedExprKind::FmtString(parts) = &args[0].kind { - let count = parts - .iter() - .filter(|p| matches!(p, TypedFmtStringPart::Placeholder)) - .count(); - return Some((parts, count)); - } - None - } - - fn compile_typed_call_with_fmt_placeholders( - &mut self, - callee: &aelys_sema::TypedExpr, - args: &[aelys_sema::TypedExpr], - fmt_parts: &[TypedFmtStringPart], - placeholder_count: usize, - dest: u8, - span: Span, - ) -> Result<()> { - let extra_args_needed = placeholder_count; - let extra_args_available = args.len() - 1; - - if extra_args_available < extra_args_needed { - return Err(CompileError::new( - CompileErrorKind::TypeInferenceError(format!( - "format string has {} placeholder(s) but only {} argument(s) provided", - extra_args_needed, extra_args_available - )), - span, - self.source.clone(), - ) - .into()); - } - - let fmt_extra_args = &args[1..1 + extra_args_needed]; - let remaining_args = &args[1 + extra_args_needed..]; - - let total_args = 1 + remaining_args.len(); - let func_reg = self.alloc_consecutive_registers_for_call(total_args as u8 + 1, span)?; - - for i in 0..=total_args { - let reg = func_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - self.compile_typed_expr(callee, func_reg)?; - - let fmt_reg = func_reg + 1; - self.compile_typed_fmt_string(fmt_parts, fmt_extra_args, fmt_reg, args[0].span)?; - - for (i, arg) in remaining_args.iter().enumerate() { - let arg_reg = func_reg + 2 + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_a(OpCode::Call, dest, func_reg, total_args as u8, span); - - for i in (0..=total_args).rev() { - let reg = func_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/call_helpers.rs b/backend/src/compiler/expr/typed/call_helpers.rs deleted file mode 100644 index aa00fce..0000000 --- a/backend/src/compiler/expr/typed/call_helpers.rs +++ /dev/null @@ -1,209 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_sema::TypedExprKind; -use aelys_syntax::Span; - -impl Compiler { - pub(super) fn compile_typed_builtin_call( - &mut self, - name: &str, - args: &[aelys_sema::TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - // fast path: direct opcodes for memory operations - match name { - "alloc" if args.len() == 1 => { - return self.compile_typed_alloc(&args[0], dest, span); - } - "free" if args.len() == 1 => { - return self.compile_typed_free(&args[0], dest, span); - } - "load" if args.len() == 2 => { - return self.compile_typed_load(&args[0], &args[1], dest, span); - } - "store" if args.len() == 3 => { - return self.compile_typed_store(&args[0], &args[1], &args[2], dest, span); - } - _ => {} - } - - // fallback to CallGlobalNative for 'type' and other builtins - let idx = self.get_or_create_global_index(name); - self.accessed_globals.insert(name.to_string()); - - if idx > 255 { - return self.compile_typed_call_generic(name, args, dest, span); - } - - let arg_start = match dest.checked_add(1) { - Some(s) => s, - None => return self.compile_typed_call_generic(name, args, dest, span), - }; - - for i in 0..args.len() { - let arg_reg = match arg_start.checked_add(i as u8) { - Some(r) => r, - None => return self.compile_typed_call_generic(name, args, dest, span), - }; - if (arg_reg as usize) >= self.register_pool.len() { - return self.compile_typed_call_generic(name, args, dest, span); - } - if self.register_pool[arg_reg as usize] { - return self.compile_typed_call_generic(name, args, dest, span); - } - } - - for i in 0..args.len() { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = true; - if arg_reg >= self.next_register { - self.next_register = arg_reg + 1; - } - } - - for (i, arg) in args.iter().enumerate() { - let arg_reg = arg_start + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_call_global_cached(dest, idx as u8, args.len() as u8, name, span); - - for i in (0..args.len()).rev() { - let arg_reg = arg_start + i as u8; - self.register_pool[arg_reg as usize] = false; - } - - Ok(()) - } - - fn compile_typed_alloc( - &mut self, - size_expr: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let size_reg = self.alloc_register()?; - self.compile_typed_expr(size_expr, size_reg)?; - self.emit_a(OpCode::Alloc, dest, size_reg, 0, span); - self.free_register(size_reg); - Ok(()) - } - - fn compile_typed_free( - &mut self, - ptr_expr: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let ptr_reg = self.alloc_register()?; - self.compile_typed_expr(ptr_expr, ptr_reg)?; - self.emit_a(OpCode::Free, ptr_reg, 0, 0, span); - self.free_register(ptr_reg); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - Ok(()) - } - - fn compile_typed_load( - &mut self, - ptr_expr: &aelys_sema::TypedExpr, - offset_expr: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - // Optimize: use LoadMemI for constant offsets 0-255 - if let TypedExprKind::Int(offset) = &offset_expr.kind - && *offset >= 0 - && *offset <= 255 - { - let ptr_reg = self.alloc_register()?; - self.compile_typed_expr(ptr_expr, ptr_reg)?; - self.emit_a(OpCode::LoadMemI, dest, ptr_reg, *offset as u8, span); - self.free_register(ptr_reg); - return Ok(()); - } - - let ptr_reg = self.alloc_register()?; - let offset_reg = self.alloc_register()?; - self.compile_typed_expr(ptr_expr, ptr_reg)?; - self.compile_typed_expr(offset_expr, offset_reg)?; - self.emit_a(OpCode::LoadMem, dest, ptr_reg, offset_reg, span); - self.free_register(offset_reg); - self.free_register(ptr_reg); - Ok(()) - } - - fn compile_typed_store( - &mut self, - ptr_expr: &aelys_sema::TypedExpr, - offset_expr: &aelys_sema::TypedExpr, - value_expr: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - // Optimize: use StoreMemI for constant offsets 0-255 - if let TypedExprKind::Int(offset) = &offset_expr.kind - && *offset >= 0 - && *offset <= 255 - { - let ptr_reg = self.alloc_register()?; - let val_reg = self.alloc_register()?; - self.compile_typed_expr(ptr_expr, ptr_reg)?; - self.compile_typed_expr(value_expr, val_reg)?; - self.emit_a(OpCode::StoreMemI, ptr_reg, *offset as u8, val_reg, span); - self.free_register(val_reg); - self.free_register(ptr_reg); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - return Ok(()); - } - - let ptr_reg = self.alloc_register()?; - let offset_reg = self.alloc_register()?; - let val_reg = self.alloc_register()?; - self.compile_typed_expr(ptr_expr, ptr_reg)?; - self.compile_typed_expr(offset_expr, offset_reg)?; - self.compile_typed_expr(value_expr, val_reg)?; - self.emit_a(OpCode::StoreMem, ptr_reg, offset_reg, val_reg, span); - self.free_register(val_reg); - self.free_register(offset_reg); - self.free_register(ptr_reg); - self.emit_a(OpCode::LoadNull, dest, 0, 0, span); - Ok(()) - } - - pub(super) fn compile_typed_call_generic( - &mut self, - name: &str, - args: &[aelys_sema::TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - let nargs = args.len(); - let callee_reg = self.alloc_consecutive_registers_for_call(nargs as u8 + 1, span)?; - - for i in 0..=nargs { - let reg = callee_reg + i as u8; - self.register_pool[reg as usize] = true; - if reg >= self.next_register { - self.next_register = reg + 1; - } - } - - self.compile_identifier(name, callee_reg, span)?; - - for (i, arg) in args.iter().enumerate() { - let arg_reg = callee_reg + 1 + i as u8; - self.compile_typed_expr(arg, arg_reg)?; - } - - self.emit_c(OpCode::Call, dest, callee_reg, args.len() as u8, span); - - for i in (0..=nargs).rev() { - let reg = callee_reg + i as u8; - self.register_pool[reg as usize] = false; - } - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/control.rs b/backend/src/compiler/expr/typed/control.rs deleted file mode 100644 index 8ee1273..0000000 --- a/backend/src/compiler/expr/typed/control.rs +++ /dev/null @@ -1,28 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; - -impl Compiler { - pub(super) fn compile_typed_if_expr( - &mut self, - condition: &aelys_sema::TypedExpr, - then_branch: &aelys_sema::TypedExpr, - else_branch: &aelys_sema::TypedExpr, - dest: u8, - ) -> Result<()> { - let cond_reg = self.alloc_register()?; - self.compile_typed_expr(condition, cond_reg)?; - - let else_jump = self.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - self.free_register(cond_reg); - - self.compile_typed_expr(then_branch, dest)?; - let end_jump = self.emit_jump(OpCode::Jump, then_branch.span); - - self.patch_jump(else_jump); - self.compile_typed_expr(else_branch, dest)?; - self.patch_jump(end_jump); - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/dispatch.rs b/backend/src/compiler/expr/typed/dispatch.rs deleted file mode 100644 index 38141b2..0000000 --- a/backend/src/compiler/expr/typed/dispatch.rs +++ /dev/null @@ -1,163 +0,0 @@ -use super::super::Compiler; -use aelys_common::Result; - -impl Compiler { - pub fn compile_typed_expr(&mut self, expr: &aelys_sema::TypedExpr, dest: u8) -> Result<()> { - use aelys_sema::TypedExprKind; - - match &expr.kind { - TypedExprKind::Int(n) => self.compile_literal_int(*n, dest, expr.span), - TypedExprKind::Float(f) => self.compile_literal_float(*f, dest, expr.span), - TypedExprKind::String(s) => self.compile_literal_string(s, dest, expr.span), - TypedExprKind::FmtString(parts) => { - self.compile_typed_fmt_string(parts, &[], dest, expr.span) - } - TypedExprKind::Bool(b) => self.compile_literal_bool(*b, dest, expr.span), - TypedExprKind::Null => self.compile_literal_null(dest, expr.span), - TypedExprKind::Identifier(name) => self.compile_identifier(name, dest, expr.span), - TypedExprKind::Binary { left, op, right } => { - self.compile_typed_binary(left, *op, right, dest, expr.span) - } - TypedExprKind::Unary { op, operand } => { - self.compile_typed_unary(*op, operand, dest, expr.span) - } - TypedExprKind::And { left, right } => { - self.compile_typed_and(left, right, dest, expr.span) - } - TypedExprKind::Or { left, right } => { - self.compile_typed_or(left, right, dest, expr.span) - } - TypedExprKind::Call { callee, args } => { - self.compile_typed_call(callee, args, dest, expr.span) - } - TypedExprKind::Assign { name, value } => { - self.compile_typed_assign(name, value, dest, expr.span) - } - TypedExprKind::Grouping(inner) => self.compile_typed_expr(inner, dest), - TypedExprKind::If { - condition, - then_branch, - else_branch, - } => self.compile_typed_if_expr(condition, then_branch, else_branch, dest), - TypedExprKind::Lambda(inner) => self.compile_typed_expr(inner, dest), - TypedExprKind::LambdaInner { - params, - return_type: _, - body, - captures, - } => self.compile_typed_lambda_with_stmts(params, body, captures, dest, expr.span), - TypedExprKind::Member { object, member } => { - self.compile_typed_member_access(object, member, dest, expr.span) - } - TypedExprKind::ArrayLiteral { elements, .. } => { - self.compile_typed_array_literal(&expr.ty, elements, dest, expr.span) - } - TypedExprKind::ArraySized { element_type, size } => { - self.compile_typed_array_sized(element_type, size, dest, expr.span) - } - TypedExprKind::VecLiteral { elements, .. } => { - self.compile_typed_vec_literal(&expr.ty, elements, dest, expr.span) - } - TypedExprKind::Index { object, index } => { - self.compile_typed_index_access(object, index, dest, expr.span) - } - TypedExprKind::IndexAssign { - object, - index, - value, - } => self.compile_typed_index_assign(object, index, value, dest, expr.span), - TypedExprKind::Range { - start, - end, - inclusive, - } => self.compile_typed_range(start, end, *inclusive, dest, expr.span), - TypedExprKind::Slice { object, range } => { - self.compile_typed_slice(object, range, dest, expr.span) - } - TypedExprKind::StructLiteral { .. } => Err(aelys_common::error::AelysError::Compile( - aelys_common::error::CompileError::new( - aelys_common::error::CompileErrorKind::TypeInferenceError( - "structs are not supported in the VM backend".to_string(), - ), - expr.span, - self.source.clone(), - ), - )), - TypedExprKind::Cast { expr: inner, .. } => { - // stub. - // cast: sized types collapse in VM backend - self.compile_typed_expr(inner, dest) - } - } - } - - pub(super) fn typed_expr_may_have_side_effects(expr: &aelys_sema::TypedExpr) -> bool { - use aelys_sema::TypedExprKind; - - match &expr.kind { - TypedExprKind::Call { .. } => true, - TypedExprKind::Assign { .. } => true, - TypedExprKind::Binary { left, right, .. } => { - Self::typed_expr_may_have_side_effects(left) - || Self::typed_expr_may_have_side_effects(right) - } - TypedExprKind::Unary { operand, .. } => Self::typed_expr_may_have_side_effects(operand), - TypedExprKind::And { left, right } | TypedExprKind::Or { left, right } => { - Self::typed_expr_may_have_side_effects(left) - || Self::typed_expr_may_have_side_effects(right) - } - TypedExprKind::If { - condition, - then_branch, - else_branch, - } => { - Self::typed_expr_may_have_side_effects(condition) - || Self::typed_expr_may_have_side_effects(then_branch) - || Self::typed_expr_may_have_side_effects(else_branch) - } - TypedExprKind::Grouping(inner) | TypedExprKind::Lambda(inner) => { - Self::typed_expr_may_have_side_effects(inner) - } - TypedExprKind::Member { object, .. } => Self::typed_expr_may_have_side_effects(object), - TypedExprKind::LambdaInner { .. } => false, - TypedExprKind::ArrayLiteral { elements, .. } - | TypedExprKind::VecLiteral { elements, .. } => { - elements.iter().any(Self::typed_expr_may_have_side_effects) - } - TypedExprKind::ArraySized { size, .. } => Self::typed_expr_may_have_side_effects(size), - TypedExprKind::Index { object, index } => { - Self::typed_expr_may_have_side_effects(object) - || Self::typed_expr_may_have_side_effects(index) - } - TypedExprKind::IndexAssign { .. } => true, // assignment has side effects - TypedExprKind::Range { start, end, .. } => { - start - .as_ref() - .is_some_and(|s| Self::typed_expr_may_have_side_effects(s)) - || end - .as_ref() - .is_some_and(|e| Self::typed_expr_may_have_side_effects(e)) - } - TypedExprKind::Slice { object, range } => { - Self::typed_expr_may_have_side_effects(object) - || Self::typed_expr_may_have_side_effects(range) - } - TypedExprKind::FmtString(parts) => parts.iter().any(|p| match p { - aelys_sema::TypedFmtStringPart::Expr(e) => { - Self::typed_expr_may_have_side_effects(e) - } - _ => false, - }), - TypedExprKind::StructLiteral { fields, .. } => fields - .iter() - .any(|(_, v)| Self::typed_expr_may_have_side_effects(v)), - TypedExprKind::Cast { expr, .. } => Self::typed_expr_may_have_side_effects(expr), - TypedExprKind::Int(_) - | TypedExprKind::Float(_) - | TypedExprKind::Bool(_) - | TypedExprKind::String(_) - | TypedExprKind::Null - | TypedExprKind::Identifier(_) => false, - } - } -} diff --git a/backend/src/compiler/expr/typed/fmt_string.rs b/backend/src/compiler/expr/typed/fmt_string.rs deleted file mode 100644 index 6289dcd..0000000 --- a/backend/src/compiler/expr/typed/fmt_string.rs +++ /dev/null @@ -1,148 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_sema::{TypedExpr, TypedFmtStringPart}; -use aelys_syntax::Span; - -impl Compiler { - pub fn compile_typed_fmt_string( - &mut self, - parts: &[TypedFmtStringPart], - extra_args: &[TypedExpr], - dest: u8, - span: Span, - ) -> Result<()> { - let placeholder_count = parts - .iter() - .filter(|p| matches!(p, TypedFmtStringPart::Placeholder)) - .count(); - - if placeholder_count != extra_args.len() { - return Err(CompileError::new( - CompileErrorKind::TypeInferenceError(format!( - "format string has {} placeholder(s) but {} argument(s) provided", - placeholder_count, - extra_args.len() - )), - span, - self.source.clone(), - ) - .into()); - } - - if parts.is_empty() { - return self.compile_literal_string("", dest, span); - } - - if parts.len() == 1 && extra_args.is_empty() { - return self.compile_single_typed_fmt_part(&parts[0], dest, span); - } - - let mut arg_idx = 0; - let mut result_reg: Option = None; - - for part in parts { - let part_reg = self.alloc_register()?; - - match part { - TypedFmtStringPart::Literal(s) => { - self.compile_literal_string(s, part_reg, span)?; - } - TypedFmtStringPart::Expr(expr) => { - self.compile_typed_expr_to_string(expr, part_reg, span)?; - } - TypedFmtStringPart::Placeholder => { - let arg = &extra_args[arg_idx]; - arg_idx += 1; - self.compile_typed_expr_to_string(arg, part_reg, span)?; - } - } - - match result_reg { - None => { - result_reg = Some(part_reg); - } - Some(acc) => { - self.emit_a(OpCode::Add, acc, acc, part_reg, span); - self.free_register(part_reg); - } - } - } - - if let Some(acc) = result_reg - && acc != dest - { - self.emit_a(OpCode::Move, dest, acc, 0, span); - self.free_register(acc); - } - - Ok(()) - } - - fn compile_single_typed_fmt_part( - &mut self, - part: &TypedFmtStringPart, - dest: u8, - span: Span, - ) -> Result<()> { - match part { - TypedFmtStringPart::Literal(s) => self.compile_literal_string(s, dest, span), - TypedFmtStringPart::Expr(expr) => self.compile_typed_expr_to_string(expr, dest, span), - TypedFmtStringPart::Placeholder => Err(CompileError::new( - CompileErrorKind::TypeInferenceError("placeholder without argument".to_string()), - span, - self.source.clone(), - ) - .into()), - } - } - - fn compile_typed_expr_to_string( - &mut self, - expr: &TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - // CallGlobalNative reads args from dest+1, so compile the expression there. - let arg_reg = dest + 1; - - if (arg_reg as usize) < self.register_pool.len() && !self.register_pool[arg_reg as usize] { - // fast path: dest+1 is free - self.register_pool[arg_reg as usize] = true; - self.next_register = self.next_register.max(arg_reg + 1); - - self.compile_typed_expr(expr, arg_reg)?; - self.emit_typed_tostring_call(dest, span)?; - - self.register_pool[arg_reg as usize] = false; - } else { - // slow path: dest+1 is occupied, use a fresh consecutive pair - let call_base = self.alloc_consecutive_registers_for_call(2, span)?; - let call_arg = call_base + 1; - - self.register_pool[call_base as usize] = true; - self.register_pool[call_arg as usize] = true; - self.next_register = self.next_register.max(call_arg + 1); - - self.compile_typed_expr(expr, call_arg)?; - self.emit_typed_tostring_call(call_base, span)?; - - if call_base != dest { - self.emit_a(OpCode::Move, dest, call_base, 0, span); - } - - self.register_pool[call_arg as usize] = false; - self.register_pool[call_base as usize] = false; - } - - Ok(()) - } - - fn emit_typed_tostring_call(&mut self, reg: u8, span: Span) -> Result<()> { - let global_idx = self.get_or_create_global_index("__tostring"); - self.accessed_globals.insert("__tostring".to_string()); - self.emit_call_global_cached(reg, global_idx as u8, 1, "__tostring", span); - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/lambda.rs b/backend/src/compiler/expr/typed/lambda.rs deleted file mode 100644 index 859d11e..0000000 --- a/backend/src/compiler/expr/typed/lambda.rs +++ /dev/null @@ -1,111 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::{OpCode, Value}; -use aelys_common::Result; -use aelys_syntax::Span; - -impl Compiler { - pub fn compile_typed_lambda( - &mut self, - params: &[aelys_sema::TypedParam], - body: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - self.compile_typed_lambda_impl(params, body, dest, span) - } - - fn compile_typed_lambda_impl( - &mut self, - params: &[aelys_sema::TypedParam], - body: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - let mut nested_compiler = super::super::Compiler::for_nested_function( - Some("".to_string()), - self.source.clone(), - self.heap.clone(), - self.globals.clone(), - self.global_indices.clone(), - self.next_global_index, - self.locals.clone(), - self.upvalues.clone(), - self.all_enclosing_locals.clone(), - self.module_aliases.clone(), - self.known_globals.clone(), - self.known_native_globals.clone(), - self.symbol_origins.clone(), - self.next_call_site_slot, - ); - nested_compiler.current.arity = params.len() as u8; - - nested_compiler.begin_scope(); - - for param in params { - let reg = nested_compiler.alloc_register()?; - let resolved_type = aelys_sema::ResolvedType::from_infer_type(¶m.ty); - nested_compiler.add_local(param.name.clone(), param.mutable, reg, resolved_type); - } - - let result_reg = nested_compiler.alloc_register()?; - nested_compiler.compile_typed_expr(body, result_reg)?; - nested_compiler.emit_a(OpCode::Return, result_reg, 0, 0, body.span); - - nested_compiler.end_scope(); - nested_compiler.current.num_registers = nested_compiler.next_register; - nested_compiler.current.global_layout = nested_compiler.build_global_layout(); - nested_compiler.current.compute_global_layout_hash(); - - self.mark_captures_from_nested(&nested_compiler); - - let mut compiled_func = nested_compiler.current.clone(); - let mut nested_upvalues = nested_compiler.upvalues.clone(); - - self.fix_transitive_captures(&mut nested_upvalues); - - for upvalue in &nested_upvalues { - compiled_func - .upvalue_descriptors - .push(aelys_bytecode::UpvalueDescriptor { - is_local: upvalue.is_local, - index: upvalue.index, - }); - } - - let remap = self.heap.merge(&mut nested_compiler.heap); - compiled_func.remap_constants(&remap); - - for (name, idx) in &nested_compiler.global_indices { - if !self.global_indices.contains_key(name) { - self.global_indices.insert(name.clone(), *idx); - } - } - self.next_global_index = nested_compiler.next_global_index; - - if nested_compiler.next_call_site_slot > self.next_call_site_slot { - self.next_call_site_slot = nested_compiler.next_call_site_slot; - } - - let func_ref = self.heap.alloc_function(compiled_func); - let const_idx = self.add_constant(Value::ptr(func_ref.index()), span)?; - - if nested_upvalues.is_empty() { - self.emit_b(OpCode::LoadK, dest, const_idx as i16, span); - } else { - self.emit_a( - OpCode::MakeClosure, - dest, - const_idx as u8, - nested_upvalues.len() as u8, - span, - ); - - for upval in &nested_upvalues { - self.current - .push_raw(((upval.is_local as u32) << 8) | (upval.index as u32)); - } - } - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/lambda_stmts.rs b/backend/src/compiler/expr/typed/lambda_stmts.rs deleted file mode 100644 index de0bc60..0000000 --- a/backend/src/compiler/expr/typed/lambda_stmts.rs +++ /dev/null @@ -1,159 +0,0 @@ -use super::super::super::Upvalue; -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; - -impl Compiler { - pub(super) fn compile_typed_lambda_with_stmts( - &mut self, - params: &[aelys_sema::TypedParam], - body: &[aelys_sema::TypedStmt], - captures: &[(String, aelys_sema::InferType)], - dest: u8, - span: Span, - ) -> Result<()> { - let mut nested_compiler = super::super::Compiler::for_nested_function( - Some("".to_string()), - self.source.clone(), - self.heap.clone(), - self.globals.clone(), - self.global_indices.clone(), - self.next_global_index, - self.locals.clone(), - self.upvalues.clone(), - self.all_enclosing_locals.clone(), - self.module_aliases.clone(), - self.known_globals.clone(), - self.known_native_globals.clone(), - self.symbol_origins.clone(), - self.next_call_site_slot, - ); - nested_compiler.current.arity = params.len() as u8; - - #[allow(clippy::collapsible_if)] - for (capture_name, _capture_ty) in captures { - if let Some((local_reg, mutable, _resolved_ty)) = - self.resolve_variable_typed(capture_name) - { - if !nested_compiler - .upvalues - .iter() - .any(|uv| uv.name == *capture_name) - { - self.mark_local_captured(local_reg); - nested_compiler.upvalues.push(Upvalue { - is_local: true, - index: local_reg, - name: capture_name.clone(), - mutable, - }); - } - continue; - } - - let _ = self.resolve_upvalue(capture_name); - - if let Some((upvalue_idx, _)) = self - .upvalues - .iter() - .enumerate() - .find(|(_, uv)| uv.name == *capture_name) - && !nested_compiler - .upvalues - .iter() - .any(|uv| uv.name == *capture_name) - { - nested_compiler.upvalues.push(Upvalue { - is_local: false, - index: upvalue_idx as u8, - name: capture_name.clone(), - mutable: self.upvalues[upvalue_idx].mutable, - }); - } - } - - nested_compiler.begin_scope(); - - for param in params { - let reg = nested_compiler.alloc_register()?; - let resolved_type = aelys_sema::ResolvedType::from_infer_type(¶m.ty); - nested_compiler.add_local(param.name.clone(), param.mutable, reg, resolved_type); - } - - if !body.is_empty() { - for stmt in &body[..body.len() - 1] { - nested_compiler.compile_typed_stmt(stmt)?; - } - - let last_stmt = &body[body.len() - 1]; - match &last_stmt.kind { - aelys_sema::TypedStmtKind::Expression(expr) => { - let result_reg = nested_compiler.alloc_register()?; - nested_compiler.compile_typed_expr(expr, result_reg)?; - nested_compiler.emit_a(OpCode::Return, result_reg, 0, 0, last_stmt.span); - } - aelys_sema::TypedStmtKind::Return(_) => { - nested_compiler.compile_typed_stmt(last_stmt)?; - } - _ => { - nested_compiler.compile_typed_stmt(last_stmt)?; - nested_compiler.emit_return0(last_stmt.span); - } - } - } else { - nested_compiler.emit_return0(span); - } - - nested_compiler.end_scope(); - nested_compiler.current.num_registers = nested_compiler.next_register; - nested_compiler.current.global_layout = nested_compiler.build_global_layout(); - nested_compiler.current.compute_global_layout_hash(); - - self.mark_captures_from_nested(&nested_compiler); - - let mut compiled_func = nested_compiler.current.clone(); - let mut nested_upvalues = nested_compiler.upvalues.clone(); - - self.fix_transitive_captures(&mut nested_upvalues); - - for upvalue in &nested_upvalues { - compiled_func - .upvalue_descriptors - .push(aelys_bytecode::UpvalueDescriptor { - is_local: upvalue.is_local, - index: upvalue.index, - }); - } - - let remap = self.heap.merge(&mut nested_compiler.heap); - compiled_func.remap_constants(&remap); - - for (name, idx) in &nested_compiler.global_indices { - if !self.global_indices.contains_key(name) { - self.global_indices.insert(name.clone(), *idx); - } - } - self.next_global_index = nested_compiler.next_global_index; - - if nested_compiler.next_call_site_slot > self.next_call_site_slot { - self.next_call_site_slot = nested_compiler.next_call_site_slot; - } - - let const_idx = self.current.add_constant_function(compiled_func); - - if nested_upvalues.is_empty() { - self.emit_b(OpCode::LoadK, dest, const_idx as i16, span); - } else { - self.emit_a( - OpCode::MakeClosure, - dest, - const_idx as u8, - nested_upvalues.len() as u8, - span, - ); - } - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/logic.rs b/backend/src/compiler/expr/typed/logic.rs deleted file mode 100644 index 2879e69..0000000 --- a/backend/src/compiler/expr/typed/logic.rs +++ /dev/null @@ -1,34 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; - -impl Compiler { - pub(super) fn compile_typed_and( - &mut self, - left: &aelys_sema::TypedExpr, - right: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - self.compile_typed_expr(left, dest)?; - let jump_false = self.emit_jump_if(OpCode::JumpIfNot, dest, span); - self.compile_typed_expr(right, dest)?; - self.patch_jump(jump_false); - Ok(()) - } - - pub(super) fn compile_typed_or( - &mut self, - left: &aelys_sema::TypedExpr, - right: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - self.compile_typed_expr(left, dest)?; - let jump_true = self.emit_jump_if(OpCode::JumpIf, dest, span); - self.compile_typed_expr(right, dest)?; - self.patch_jump(jump_true); - Ok(()) - } -} diff --git a/backend/src/compiler/expr/typed/member.rs b/backend/src/compiler/expr/typed/member.rs deleted file mode 100644 index fc99ed8..0000000 --- a/backend/src/compiler/expr/typed/member.rs +++ /dev/null @@ -1,27 +0,0 @@ -use super::super::Compiler; -use aelys_common::Result; -use aelys_syntax::Span; - -impl Compiler { - pub(super) fn compile_typed_member_access( - &mut self, - object: &aelys_sema::TypedExpr, - member: &str, - dest: u8, - span: Span, - ) -> Result<()> { - use aelys_sema::TypedExprKind; - - if let TypedExprKind::Identifier(module_name) = &object.kind - && self.module_aliases.contains(module_name) - { - let global_name = format!("{}::{}", module_name, member); - let idx = self.get_or_create_global_index(&global_name); - self.accessed_globals.insert(global_name.clone()); - self.emit_b(aelys_bytecode::OpCode::GetGlobalIdx, dest, idx as i16, span); - return Ok(()); - } - - self.compile_identifier(member, dest, span) - } -} diff --git a/backend/src/compiler/expr/typed/mod.rs b/backend/src/compiler/expr/typed/mod.rs deleted file mode 100644 index 364ff97..0000000 --- a/backend/src/compiler/expr/typed/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -mod array; -mod assign; -mod binary; -mod call; -mod call_helpers; -mod control; -mod dispatch; -mod fmt_string; -mod lambda; -mod lambda_stmts; -mod logic; -mod member; -mod unary; diff --git a/backend/src/compiler/expr/typed/unary.rs b/backend/src/compiler/expr/typed/unary.rs deleted file mode 100644 index 204dcdd..0000000 --- a/backend/src/compiler/expr/typed/unary.rs +++ /dev/null @@ -1,26 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::UnaryOp; - -impl Compiler { - pub(super) fn compile_typed_unary( - &mut self, - op: UnaryOp, - operand: &aelys_sema::TypedExpr, - dest: u8, - span: Span, - ) -> Result<()> { - self.compile_typed_expr(operand, dest)?; - - let opcode = match op { - UnaryOp::Neg => OpCode::Neg, - UnaryOp::Not => OpCode::Not, - UnaryOp::BitNot => OpCode::BitNot, - }; - self.emit_a(opcode, dest, dest, 0, span); - - Ok(()) - } -} diff --git a/backend/src/compiler/expr/unary.rs b/backend/src/compiler/expr/unary.rs deleted file mode 100644 index c8dce28..0000000 --- a/backend/src/compiler/expr/unary.rs +++ /dev/null @@ -1,31 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, UnaryOp}; - -impl Compiler { - // could probably optimize -literal to just negate at compile time... - pub fn compile_unary( - &mut self, - op: UnaryOp, - operand: &Expr, - dest: u8, - span: Span, - ) -> Result<()> { - let operand_reg = self.alloc_register()?; - - self.compile_expr(operand, operand_reg)?; - - let opcode = match op { - UnaryOp::Neg => OpCode::Neg, - UnaryOp::Not => OpCode::Not, - UnaryOp::BitNot => OpCode::BitNot, - }; - - self.emit_a(opcode, dest, operand_reg, 0, span); - self.free_register(operand_reg); - - Ok(()) - } -} diff --git a/backend/src/compiler/functions/captures.rs b/backend/src/compiler/functions/captures.rs deleted file mode 100644 index 4da6415..0000000 --- a/backend/src/compiler/functions/captures.rs +++ /dev/null @@ -1,28 +0,0 @@ -use super::super::{Compiler, Upvalue}; - -impl Compiler { - pub fn mark_captures_from_nested(&mut self, nested: &Compiler) { - for upvalue in &nested.upvalues { - if upvalue.is_local { - self.mark_local_captured(upvalue.index); - } else if upvalue.index & 0x80 != 0 { - let _resolved = self.resolve_upvalue(&upvalue.name); - } - } - } - - pub fn fix_transitive_captures(&self, nested_upvalues: &mut [Upvalue]) { - for upvalue in nested_upvalues.iter_mut() { - if !upvalue.is_local - && upvalue.index & 0x80 != 0 - && let Some((idx, _)) = self - .upvalues - .iter() - .enumerate() - .find(|(_, u)| u.name == upvalue.name) - { - upvalue.index = idx as u8; - } - } - } -} diff --git a/backend/src/compiler/functions/mod.rs b/backend/src/compiler/functions/mod.rs deleted file mode 100644 index 3c3ea88..0000000 --- a/backend/src/compiler/functions/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -mod captures; -mod typed; -mod typed_body; -mod typed_finalize; -mod typed_setup; -mod untyped; -mod untyped_body; -mod untyped_finalize; diff --git a/backend/src/compiler/functions/typed.rs b/backend/src/compiler/functions/typed.rs deleted file mode 100644 index 54f79b2..0000000 --- a/backend/src/compiler/functions/typed.rs +++ /dev/null @@ -1,16 +0,0 @@ -use super::super::Compiler; -use super::typed_body::compile_typed_body; -use super::typed_finalize::finalize_typed_function; -use super::typed_setup::setup_typed_function; -use aelys_common::Result; - -impl Compiler { - pub fn compile_typed_function(&mut self, func: &aelys_sema::TypedFunction) -> Result<()> { - let setup = setup_typed_function(self, func)?; - let mut nested_compiler = setup.nested_compiler; - - compile_typed_body(&mut nested_compiler, func, setup.has_no_gc)?; - - finalize_typed_function(self, nested_compiler, func, setup.func_var_reg) - } -} diff --git a/backend/src/compiler/functions/typed_body.rs b/backend/src/compiler/functions/typed_body.rs deleted file mode 100644 index 74d1c5d..0000000 --- a/backend/src/compiler/functions/typed_body.rs +++ /dev/null @@ -1,94 +0,0 @@ -use super::super::Compiler; -use super::super::liveness::LivenessAnalysis; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_sema::{TypedFunction, TypedStmtKind}; -use std::collections::HashSet; - -pub(super) fn compile_typed_body( - nested_compiler: &mut Compiler, - func: &TypedFunction, - has_no_gc: bool, -) -> Result<()> { - nested_compiler.begin_scope(); - - for param in &func.params { - let reg = nested_compiler.alloc_register()?; - let resolved_type = aelys_sema::ResolvedType::from_infer_type(¶m.ty); - nested_compiler.add_local(param.name.clone(), param.mutable, reg, resolved_type); - } - - if has_no_gc { - nested_compiler.emit_a(OpCode::EnterNoGc, 0, 0, 0, func.span); - } - - let liveness = LivenessAnalysis::analyze_function(func); - - if func.body.is_empty() { - if has_no_gc { - nested_compiler.emit_a(OpCode::ExitNoGc, 0, 0, 0, func.span); - } - nested_compiler.emit_a(OpCode::Return0, 0, 0, 0, func.span); - nested_compiler.end_scope(); - return Ok(()); - } - - let last_idx = func.body.len() - 1; - let mut already_freed = HashSet::new(); - for (stmt_idx, stmt) in func.body[..last_idx].iter().enumerate() { - nested_compiler.compile_typed_stmt(stmt)?; - nested_compiler.free_dead_locals(stmt_idx, &liveness, &mut already_freed); - } - - let last_stmt = &func.body[last_idx]; - - let implicit_return_reg = match &last_stmt.kind { - TypedStmtKind::Expression(expr) => { - let result_reg = nested_compiler.alloc_register()?; - nested_compiler.compile_typed_expr(expr, result_reg)?; - Some(result_reg) - } - TypedStmtKind::If { - condition, - then_branch, - else_branch, - } if else_branch.is_some() => { - let result_reg = nested_compiler.alloc_register()?; - let cond_reg = nested_compiler.alloc_register()?; - - nested_compiler.compile_typed_expr(condition, cond_reg)?; - let jump_to_else = - nested_compiler.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - nested_compiler.free_register(cond_reg); - - nested_compiler.compile_typed_if_branch_for_return(then_branch, result_reg)?; - let jump_to_end = nested_compiler.emit_jump(OpCode::Jump, then_branch.span); - nested_compiler.patch_jump(jump_to_else); - - if let Some(else_branch) = else_branch.as_ref() { - nested_compiler.compile_typed_if_branch_for_return(else_branch, result_reg)?; - } - nested_compiler.patch_jump(jump_to_end); - - Some(result_reg) - } - _ => { - nested_compiler.compile_typed_stmt(last_stmt)?; - nested_compiler.free_dead_locals(last_idx, &liveness, &mut already_freed); - None - } - }; - - if has_no_gc { - nested_compiler.emit_a(OpCode::ExitNoGc, 0, 0, 0, func.span); - } - - if let Some(result_reg) = implicit_return_reg { - nested_compiler.emit_a(OpCode::Return, result_reg, 0, 0, func.span); - } else { - nested_compiler.emit_a(OpCode::Return0, 0, 0, 0, func.span); - } - - nested_compiler.end_scope(); - Ok(()) -} diff --git a/backend/src/compiler/functions/typed_finalize.rs b/backend/src/compiler/functions/typed_finalize.rs deleted file mode 100644 index 052ef53..0000000 --- a/backend/src/compiler/functions/typed_finalize.rs +++ /dev/null @@ -1,65 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::{OpCode, UpvalueDescriptor}; -use aelys_common::Result; -use aelys_sema::TypedFunction; - -pub(super) fn finalize_typed_function( - parent: &mut Compiler, - mut nested_compiler: Compiler, - func: &TypedFunction, - func_var_reg: u8, -) -> Result<()> { - nested_compiler.current.num_registers = nested_compiler.next_register; - nested_compiler.current.global_layout = nested_compiler.build_global_layout(); - nested_compiler.current.compute_global_layout_hash(); - nested_compiler.current.finalize_bytecode(); - - parent.mark_captures_from_nested(&nested_compiler); - - let mut compiled_func = nested_compiler.current.clone(); - let mut nested_upvalues = nested_compiler.upvalues.clone(); - parent.fix_transitive_captures(&mut nested_upvalues); - - for upvalue in &nested_upvalues { - compiled_func.upvalue_descriptors.push(UpvalueDescriptor { - is_local: upvalue.is_local, - index: upvalue.index, - }); - } - - let remap = parent.heap.merge(&mut nested_compiler.heap); - compiled_func.remap_constants(&remap); - - for (name, idx) in &nested_compiler.global_indices { - if !parent.global_indices.contains_key(name) { - parent.global_indices.insert(name.clone(), *idx); - } - } - parent.next_global_index = nested_compiler.next_global_index; - - if nested_compiler.next_call_site_slot > parent.next_call_site_slot { - parent.next_call_site_slot = nested_compiler.next_call_site_slot; - } - - let const_idx = parent.current.add_constant_function(compiled_func); - - if nested_upvalues.is_empty() { - parent.emit_b(OpCode::LoadK, func_var_reg, const_idx as i16, func.span); - } else { - parent.emit_a( - OpCode::MakeClosure, - func_var_reg, - const_idx as u8, - nested_upvalues.len() as u8, - func.span, - ); - } - - if parent.scope_depth == 0 { - let idx = parent.get_or_create_global_index(&func.name); - parent.accessed_globals.insert(func.name.clone()); - parent.emit_b(OpCode::SetGlobalIdx, func_var_reg, idx as i16, func.span); - } - - Ok(()) -} diff --git a/backend/src/compiler/functions/typed_setup.rs b/backend/src/compiler/functions/typed_setup.rs deleted file mode 100644 index 74c54aa..0000000 --- a/backend/src/compiler/functions/typed_setup.rs +++ /dev/null @@ -1,110 +0,0 @@ -use super::super::{Compiler, Upvalue}; -use aelys_common::Result; -use aelys_sema::{ResolvedType, TypedFunction}; - -pub(super) struct TypedFunctionSetup { - pub(super) func_var_reg: u8, - pub(super) has_no_gc: bool, - pub(super) nested_compiler: Compiler, -} - -pub(super) fn setup_typed_function( - parent: &mut Compiler, - func: &TypedFunction, -) -> Result { - let has_no_gc = func.decorators.iter().any(|d| d.name == "no_gc"); - let func_var_reg = parent.alloc_register()?; - - if parent.scope_depth == 0 { - parent.globals.insert(func.name.clone(), false); - if !parent.global_indices.contains_key(&func.name) { - let idx = parent.next_global_index; - parent.global_indices.insert(func.name.clone(), idx); - parent.next_global_index += 1; - } - } else { - let params_resolved: Vec<_> = func - .params - .iter() - .map(|p| ResolvedType::from_infer_type(&p.ty)) - .collect(); - let ret_resolved = ResolvedType::from_infer_type(&func.return_type); - - parent.add_local( - func.name.clone(), - false, - func_var_reg, - ResolvedType::Function { - params: params_resolved, - ret: Box::new(ret_resolved), - }, - ); - } - - let mut nested_compiler = Compiler::for_nested_function( - Some(func.name.clone()), - parent.source.clone(), - parent.heap.clone(), - parent.globals.clone(), - parent.global_indices.clone(), - parent.next_global_index, - parent.locals.clone(), - parent.upvalues.clone(), - parent.all_enclosing_locals.clone(), - parent.module_aliases.clone(), - parent.known_globals.clone(), - parent.known_native_globals.clone(), - parent.symbol_origins.clone(), - parent.next_call_site_slot, - ); - nested_compiler.current.arity = func.params.len() as u8; - nested_compiler.has_no_gc = has_no_gc; - - #[allow(clippy::collapsible_if)] - for (capture_name, _capture_ty) in &func.captures { - if let Some((local_reg, mutable, _resolved_ty)) = - parent.resolve_variable_typed(capture_name) - { - if !nested_compiler - .upvalues - .iter() - .any(|uv| uv.name == *capture_name) - { - parent.mark_local_captured(local_reg); - nested_compiler.upvalues.push(Upvalue { - is_local: true, - index: local_reg, - name: capture_name.clone(), - mutable, - }); - } - continue; - } - - let _ = parent.resolve_upvalue(capture_name); - - if let Some((upvalue_idx, _)) = parent - .upvalues - .iter() - .enumerate() - .find(|(_, uv)| uv.name == *capture_name) - && !nested_compiler - .upvalues - .iter() - .any(|uv| uv.name == *capture_name) - { - nested_compiler.upvalues.push(Upvalue { - is_local: false, - index: upvalue_idx as u8, - name: capture_name.clone(), - mutable: parent.upvalues[upvalue_idx].mutable, - }); - } - } - - Ok(TypedFunctionSetup { - func_var_reg, - has_no_gc, - nested_compiler, - }) -} diff --git a/backend/src/compiler/functions/untyped.rs b/backend/src/compiler/functions/untyped.rs deleted file mode 100644 index 5747bbd..0000000 --- a/backend/src/compiler/functions/untyped.rs +++ /dev/null @@ -1,78 +0,0 @@ -use super::super::Compiler; -use super::untyped_body::compile_untyped_body; -use super::untyped_finalize::finalize_untyped_function; -use aelys_bytecode::{Heap, OpCode}; -use aelys_common::Result; - -impl Compiler { - pub fn compile_function(&mut self, func: &aelys_syntax::ast::Function) -> Result<()> { - let func_var_reg = self.declare_variable(&func.name, false)?; - - if self.scope_depth == 0 { - self.globals.insert(func.name.clone(), false); - if !self.global_indices.contains_key(&func.name) { - let idx = self.next_global_index; - self.global_indices.insert(func.name.clone(), idx); - self.next_global_index += 1; - } - } - - let has_no_gc = func.decorators.iter().any(|d| d.name == "no_gc"); - - let heap = std::mem::replace(&mut self.heap, Heap::new()); - let globals = self.globals.clone(); - let global_indices = self.global_indices.clone(); - let enclosing_locals = self.locals.clone(); - let enclosing_upvalues = self.upvalues.clone(); - - let mut func_compiler = Compiler::for_nested_function( - Some(func.name.clone()), - self.source.clone(), - heap, - globals, - global_indices, - self.next_global_index, - enclosing_locals, - enclosing_upvalues, - self.all_enclosing_locals.clone(), - self.module_aliases.clone(), - self.known_globals.clone(), - self.known_native_globals.clone(), - self.symbol_origins.clone(), - self.next_call_site_slot, - ); - - func_compiler.has_no_gc = has_no_gc; - func_compiler.begin_scope(); - - for param in &func.params { - func_compiler.declare_variable(¶m.name, false)?; - } - - if has_no_gc { - let line = func_compiler.current_line(func.span); - func_compiler - .current - .emit_a(OpCode::EnterNoGc, 0, 0, 0, line); - } - - let body_result = compile_untyped_body(&mut func_compiler, func, has_no_gc)?; - - func_compiler.end_scope(); - - if !body_result.returned { - if has_no_gc { - let line = func_compiler.current_line(func.span); - func_compiler - .current - .emit_a(OpCode::ExitNoGc, 0, 0, 0, line); - } - func_compiler.emit_return0(func.span); - } - - func_compiler.current.num_registers = func_compiler.next_register; - func_compiler.current.arity = func.params.len() as u8; - - finalize_untyped_function(self, func_compiler, &func.name, func.span, func_var_reg) - } -} diff --git a/backend/src/compiler/functions/untyped_body.rs b/backend/src/compiler/functions/untyped_body.rs deleted file mode 100644 index f82ab16..0000000 --- a/backend/src/compiler/functions/untyped_body.rs +++ /dev/null @@ -1,94 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::ast::{Function, StmtKind}; - -pub(super) struct UntypedBodyResult { - pub(super) returned: bool, -} - -impl UntypedBodyResult { - fn returned() -> Self { - Self { returned: true } - } - - fn not_returned() -> Self { - Self { returned: false } - } -} - -pub(super) fn compile_untyped_body( - func_compiler: &mut Compiler, - func: &Function, - has_no_gc: bool, -) -> Result { - if func.body.is_empty() { - return Ok(UntypedBodyResult::not_returned()); - } - - for stmt in &func.body[..func.body.len() - 1] { - func_compiler.compile_stmt(stmt)?; - } - - let last_stmt = &func.body[func.body.len() - 1]; - - match &last_stmt.kind { - StmtKind::Expression(expr) => { - let result_reg = func_compiler.alloc_register()?; - func_compiler.compile_expr(expr, result_reg)?; - - if has_no_gc { - let line = func_compiler.current_line(func.span); - func_compiler - .current - .emit_a(OpCode::ExitNoGc, 0, 0, 0, line); - } - - let line = func_compiler.current_line(last_stmt.span); - func_compiler - .current - .emit_a(OpCode::Return, result_reg, 0, 0, line); - Ok(UntypedBodyResult::returned()) - } - StmtKind::If { - condition, - then_branch, - else_branch, - } if else_branch.is_some() => { - let result_reg = func_compiler.alloc_register()?; - let cond_reg = func_compiler.alloc_register()?; - - func_compiler.compile_expr(condition, cond_reg)?; - let jump_to_else = - func_compiler.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - func_compiler.free_register(cond_reg); - - func_compiler.compile_if_branch_for_return(then_branch, result_reg)?; - let jump_to_end = func_compiler.emit_jump(OpCode::Jump, then_branch.span); - func_compiler.patch_jump(jump_to_else); - - if let Some(else_branch) = else_branch.as_ref() { - func_compiler.compile_if_branch_for_return(else_branch, result_reg)?; - } - func_compiler.patch_jump(jump_to_end); - - if has_no_gc { - let line = func_compiler.current_line(func.span); - func_compiler - .current - .emit_a(OpCode::ExitNoGc, 0, 0, 0, line); - } - - let line = func_compiler.current_line(last_stmt.span); - func_compiler - .current - .emit_a(OpCode::Return, result_reg, 0, 0, line); - - Ok(UntypedBodyResult::returned()) - } - _ => { - func_compiler.compile_stmt(last_stmt)?; - Ok(UntypedBodyResult::not_returned()) - } - } -} diff --git a/backend/src/compiler/functions/untyped_finalize.rs b/backend/src/compiler/functions/untyped_finalize.rs deleted file mode 100644 index f0748d7..0000000 --- a/backend/src/compiler/functions/untyped_finalize.rs +++ /dev/null @@ -1,99 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::{GlobalLayout, OpCode, UpvalueDescriptor, Value}; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; -use std::sync::Arc; - -pub(super) fn finalize_untyped_function( - parent: &mut Compiler, - mut func_compiler: Compiler, - func_name: &str, - func_span: Span, - func_var_reg: u8, -) -> Result<()> { - func_compiler.current.global_layout = build_untyped_global_layout(&func_compiler); - func_compiler.current.compute_global_layout_hash(); - func_compiler.current.finalize_bytecode(); - - parent.mark_captures_from_nested(&func_compiler); - parent.fix_transitive_captures(&mut func_compiler.upvalues); - - for upvalue in &func_compiler.upvalues { - func_compiler - .current - .upvalue_descriptors - .push(UpvalueDescriptor { - is_local: upvalue.is_local, - index: upvalue.index, - }); - } - - let compiled_func = func_compiler.current; - let upvalue_count = func_compiler.upvalues.len(); - if upvalue_count > 255 { - return Err(CompileError::new( - CompileErrorKind::TooManyUpvalues, - func_span, - parent.source.clone(), - ) - .into()); - } - - parent.heap = func_compiler.heap; - - for (name, &idx) in &func_compiler.global_indices { - if !parent.global_indices.contains_key(name) { - parent.global_indices.insert(name.clone(), idx); - } - } - if func_compiler.next_global_index > parent.next_global_index { - parent.next_global_index = func_compiler.next_global_index; - } - if func_compiler.next_call_site_slot > parent.next_call_site_slot { - parent.next_call_site_slot = func_compiler.next_call_site_slot; - } - - let const_idx = parent.current.add_constant_function(compiled_func); - - if upvalue_count > 0 { - parent.emit_a( - OpCode::MakeClosure, - func_var_reg, - const_idx as u8, - upvalue_count as u8, - func_span, - ); - } else { - parent.emit_b(OpCode::LoadK, func_var_reg, const_idx as i16, func_span); - } - - if let Some(&idx) = parent.global_indices.get(func_name) { - parent.accessed_globals.insert(func_name.to_string()); - parent.emit_b(OpCode::SetGlobalIdx, func_var_reg, idx as i16, func_span); - } else { - let name_ref = parent.heap.intern_string(func_name); - let name_const_idx = parent.add_constant(Value::ptr(name_ref.index()), func_span)?; - parent.emit_a( - OpCode::SetGlobal, - func_var_reg, - name_const_idx as u8, - 0, - func_span, - ); - } - - Ok(()) -} - -fn build_untyped_global_layout(compiler: &Compiler) -> Arc { - if compiler.accessed_globals.is_empty() { - GlobalLayout::empty() - } else { - let mut names = vec![String::new(); compiler.next_global_index as usize]; - for (name, &idx) in &compiler.global_indices { - names[idx as usize] = name.clone(); - } - GlobalLayout::new(names) - } -} diff --git a/backend/src/compiler/globals.rs b/backend/src/compiler/globals.rs deleted file mode 100644 index 919e568..0000000 --- a/backend/src/compiler/globals.rs +++ /dev/null @@ -1,33 +0,0 @@ -use super::Compiler; - -impl Compiler { - /// Get or create global index, translating imported names to qualified names. - /// Use for function calls to direct imports. - pub fn get_or_create_global_index(&mut self, name: &str) -> u16 { - let actual_name = self.resolve_global_name(name).to_string(); - self.get_or_create_global_index_raw(&actual_name) - } - - /// Get or create global index without name translation. - /// Use for variable declarations and assignments. - pub fn get_or_create_global_index_raw(&mut self, name: &str) -> u16 { - if let Some(&idx) = self.global_indices.get(name) { - idx - } else { - let idx = self.next_global_index; - self.global_indices.insert(name.to_string(), idx); - self.next_global_index += 1; - idx - } - } - - pub fn resolve_global_name<'a>(&'a self, name: &'a str) -> &'a str { - // Only translate to qualified name if it's a stdlib import (contains ::) - // Custom module entries contain module path (like "mod_a") not qualified names - self.symbol_origins - .get(name) - .filter(|origin| origin.contains("::")) - .map(String::as_str) - .unwrap_or(name) - } -} diff --git a/backend/src/compiler/lambda/compile.rs b/backend/src/compiler/lambda/compile.rs deleted file mode 100644 index c887a6c..0000000 --- a/backend/src/compiler/lambda/compile.rs +++ /dev/null @@ -1,80 +0,0 @@ -use super::super::Compiler; -use super::finalize::finalize_lambda; -use aelys_bytecode::{Heap, OpCode}; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Parameter, Stmt, StmtKind}; - -impl Compiler { - pub fn compile_lambda( - &mut self, - params: &[Parameter], - body: &[Stmt], - dest: u8, - span: Span, - ) -> Result<()> { - let heap = std::mem::replace(&mut self.heap, Heap::new()); - let globals = self.globals.clone(); - let global_indices = self.global_indices.clone(); - let enclosing_locals = self.locals.clone(); - let enclosing_upvalues = self.upvalues.clone(); - - let mut lambda_compiler = Compiler::for_nested_function( - None, - self.source.clone(), - heap, - globals, - global_indices, - self.next_global_index, - enclosing_locals, - enclosing_upvalues, - self.all_enclosing_locals.clone(), - self.module_aliases.clone(), - self.known_globals.clone(), - self.known_native_globals.clone(), - self.symbol_origins.clone(), - self.next_call_site_slot, - ); - - lambda_compiler.begin_scope(); - - for param in params { - lambda_compiler.declare_variable(¶m.name, false)?; - } - - compile_lambda_body(&mut lambda_compiler, body, span)?; - - lambda_compiler.end_scope(); - lambda_compiler.current.num_registers = lambda_compiler.next_register; - lambda_compiler.current.arity = params.len() as u8; - - finalize_lambda(self, lambda_compiler, dest, span) - } -} - -fn compile_lambda_body(lambda_compiler: &mut Compiler, body: &[Stmt], span: Span) -> Result<()> { - if body.is_empty() { - lambda_compiler.emit_return0(span); - return Ok(()); - } - - for stmt in &body[..body.len() - 1] { - lambda_compiler.compile_stmt(stmt)?; - } - - let last_stmt = &body[body.len() - 1]; - - match &last_stmt.kind { - StmtKind::Expression(expr) => { - let result_reg = lambda_compiler.alloc_register()?; - lambda_compiler.compile_expr(expr, result_reg)?; - lambda_compiler.emit_a(OpCode::Return, result_reg, 0, 0, last_stmt.span); - } - _ => { - lambda_compiler.compile_stmt(last_stmt)?; - lambda_compiler.emit_return0(span); - } - } - - Ok(()) -} diff --git a/backend/src/compiler/lambda/finalize.rs b/backend/src/compiler/lambda/finalize.rs deleted file mode 100644 index 8d65528..0000000 --- a/backend/src/compiler/lambda/finalize.rs +++ /dev/null @@ -1,88 +0,0 @@ -use super::super::Compiler; -use aelys_bytecode::{OpCode, UpvalueDescriptor}; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; - -pub(super) fn finalize_lambda( - parent: &mut Compiler, - mut lambda_compiler: Compiler, - dest: u8, - span: Span, -) -> Result<()> { - let global_layout = if lambda_compiler.accessed_globals.is_empty() { - aelys_bytecode::GlobalLayout::empty() - } else { - let mut names = vec![String::new(); lambda_compiler.next_global_index as usize]; - for (name, &idx) in &lambda_compiler.global_indices { - names[idx as usize] = name.clone(); - } - aelys_bytecode::GlobalLayout::new(names) - }; - lambda_compiler.current.global_layout = global_layout; - lambda_compiler.current.compute_global_layout_hash(); - lambda_compiler.current.finalize_bytecode(); - - parent.mark_captures_from_nested(&lambda_compiler); - parent.fix_transitive_captures(&mut lambda_compiler.upvalues); - - for upvalue in &lambda_compiler.upvalues { - lambda_compiler - .current - .upvalue_descriptors - .push(UpvalueDescriptor { - is_local: upvalue.is_local, - index: upvalue.index, - }); - } - - let compiled_func = lambda_compiler.current; - let upvalue_count = lambda_compiler.upvalues.len(); - if upvalue_count > 255 { - return Err(CompileError::new( - CompileErrorKind::TooManyUpvalues, - span, - parent.source.clone(), - ) - .into()); - } - - parent.heap = lambda_compiler.heap; - - for (name, &idx) in &lambda_compiler.global_indices { - if !parent.global_indices.contains_key(name) { - parent.global_indices.insert(name.clone(), idx); - } - } - if lambda_compiler.next_global_index > parent.next_global_index { - parent.next_global_index = lambda_compiler.next_global_index; - } - - if lambda_compiler.next_call_site_slot > parent.next_call_site_slot { - parent.next_call_site_slot = lambda_compiler.next_call_site_slot; - } - - let const_idx = parent.current.add_constant_function(compiled_func); - if const_idx > u8::MAX as u16 { - return Err(CompileError::new( - CompileErrorKind::TooManyConstants, - span, - parent.source.clone(), - ) - .into()); - } - - if upvalue_count > 0 { - parent.emit_a( - OpCode::MakeClosure, - dest, - const_idx as u8, - upvalue_count as u8, - span, - ); - } else { - parent.emit_b(OpCode::LoadK, dest, const_idx as i16, span); - } - - Ok(()) -} diff --git a/backend/src/compiler/lambda/mod.rs b/backend/src/compiler/lambda/mod.rs deleted file mode 100644 index 068c043..0000000 --- a/backend/src/compiler/lambda/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod compile; -mod finalize; diff --git a/backend/src/compiler/liveness/analysis.rs b/backend/src/compiler/liveness/analysis.rs deleted file mode 100644 index 02dff2a..0000000 --- a/backend/src/compiler/liveness/analysis.rs +++ /dev/null @@ -1,42 +0,0 @@ -use super::{ControlFlowGraph, LivenessAnalysis}; -use aelys_sema::{TypedFunction, TypedStmt}; - -impl LivenessAnalysis { - pub fn analyze_function(func: &TypedFunction) -> Self { - let mut analysis = Self { - live_in: std::collections::HashMap::new(), - live_out: std::collections::HashMap::new(), - def: std::collections::HashMap::new(), - use_set: std::collections::HashMap::new(), - last_use_point: std::collections::HashMap::new(), - captured_vars: std::collections::HashSet::new(), - }; - - for (name, _) in &func.captures { - analysis.captured_vars.insert(name.clone()); - } - - let cfg = analysis.build_cfg(&func.body); - analysis.compute_def_use(&func.body); - analysis.compute_liveness(&cfg); - analysis.compute_last_use_points(&func.body); - - analysis - } - - fn build_cfg(&self, stmts: &[TypedStmt]) -> ControlFlowGraph { - super::cfg::build_cfg(stmts) - } - - fn compute_def_use(&mut self, stmts: &[TypedStmt]) { - super::def_use::compute_def_use(self, stmts) - } - - fn compute_liveness(&mut self, cfg: &ControlFlowGraph) { - super::liveness::compute_liveness(self, cfg) - } - - fn compute_last_use_points(&mut self, stmts: &[TypedStmt]) { - super::last_use::compute_last_use_points(self, stmts) - } -} diff --git a/backend/src/compiler/liveness/cfg.rs b/backend/src/compiler/liveness/cfg.rs deleted file mode 100644 index d2d7d58..0000000 --- a/backend/src/compiler/liveness/cfg.rs +++ /dev/null @@ -1,79 +0,0 @@ -use super::{BasicBlock, ControlFlowGraph}; -use aelys_sema::{TypedStmt, TypedStmtKind}; -use std::collections::HashMap; - -pub(super) fn build_cfg(stmts: &[TypedStmt]) -> ControlFlowGraph { - let mut cfg = ControlFlowGraph { - blocks: Vec::new(), - stmt_to_block: HashMap::new(), - entry: 0, - exits: Vec::new(), - }; - - if stmts.is_empty() { - let block = BasicBlock { - id: 0, - stmts: Vec::new(), - successors: Vec::new(), - predecessors: Vec::new(), - }; - cfg.blocks.push(block); - cfg.exits.push(0); - return cfg; - } - - let mut current_block_id: usize = 0; - let mut current_block_stmts: Vec = Vec::new(); - let mut current_block_predecessors: Vec = Vec::new(); - - for (idx, stmt) in stmts.iter().enumerate() { - current_block_stmts.push(idx); - cfg.stmt_to_block.insert(idx, current_block_id); - - let is_terminator = matches!( - stmt.kind, - TypedStmtKind::Return(_) - | TypedStmtKind::Break - | TypedStmtKind::Continue - | TypedStmtKind::If { .. } - | TypedStmtKind::While { .. } - | TypedStmtKind::For { .. } - | TypedStmtKind::ForEach { .. } - ); - - if is_terminator || idx == stmts.len() - 1 { - let block_id = current_block_id; - let block = BasicBlock { - id: block_id, - stmts: std::mem::take(&mut current_block_stmts), - successors: Vec::new(), - predecessors: std::mem::take(&mut current_block_predecessors), - }; - cfg.blocks.push(block); - - if idx < stmts.len() - 1 { - current_block_id = block_id + 1; - - if !matches!( - stmt.kind, - TypedStmtKind::Return(_) | TypedStmtKind::Break | TypedStmtKind::Continue - ) { - cfg.blocks[block_id].successors.push(block_id + 1); - current_block_predecessors.push(block_id); - } - } - } - } - - for block in &cfg.blocks { - if block.successors.is_empty() { - cfg.exits.push(block.id); - } - } - - if cfg.exits.is_empty() && !cfg.blocks.is_empty() { - cfg.exits.push(cfg.blocks.len() - 1); - } - - cfg -} diff --git a/backend/src/compiler/liveness/def_use.rs b/backend/src/compiler/liveness/def_use.rs deleted file mode 100644 index ffa7a10..0000000 --- a/backend/src/compiler/liveness/def_use.rs +++ /dev/null @@ -1,205 +0,0 @@ -use super::LivenessAnalysis; -use aelys_sema::{TypedExpr, TypedExprKind, TypedStmt, TypedStmtKind}; -use std::collections::HashSet; - -pub(super) fn compute_def_use(analysis: &mut LivenessAnalysis, stmts: &[TypedStmt]) { - for (idx, stmt) in stmts.iter().enumerate() { - let mut defs = HashSet::new(); - let mut uses = HashSet::new(); - - collect_def_use_stmt(analysis, stmt, &mut defs, &mut uses); - - analysis.def.insert(idx, defs); - analysis.use_set.insert(idx, uses); - } -} - -fn collect_def_use_stmt( - analysis: &mut LivenessAnalysis, - stmt: &TypedStmt, - defs: &mut HashSet, - uses: &mut HashSet, -) { - match &stmt.kind { - TypedStmtKind::Expression(expr) => { - collect_uses_expr(analysis, expr, uses); - } - TypedStmtKind::Let { - name, initializer, .. - } => { - collect_uses_expr(analysis, initializer, uses); - defs.insert(name.clone()); - } - TypedStmtKind::Block(stmts) => { - for s in stmts { - collect_def_use_stmt(analysis, s, defs, uses); - } - } - TypedStmtKind::If { - condition, - then_branch, - else_branch, - } => { - collect_uses_expr(analysis, condition, uses); - collect_def_use_stmt(analysis, then_branch, defs, uses); - if let Some(else_b) = else_branch { - collect_def_use_stmt(analysis, else_b, defs, uses); - } - } - TypedStmtKind::While { condition, body } => { - collect_uses_expr(analysis, condition, uses); - collect_def_use_stmt(analysis, body, defs, uses); - } - TypedStmtKind::For { - iterator, - start, - end, - step, - body, - .. - } => { - collect_uses_expr(analysis, start, uses); - collect_uses_expr(analysis, end, uses); - if let Some(s) = &**step { - collect_uses_expr(analysis, s, uses); - } - defs.insert(iterator.clone()); - collect_def_use_stmt(analysis, body, defs, uses); - } - TypedStmtKind::Return(Some(expr)) => { - collect_uses_expr(analysis, expr, uses); - } - TypedStmtKind::Function(inner_func) => { - for (name, _) in &inner_func.captures { - uses.insert(name.clone()); - analysis.captured_vars.insert(name.clone()); - } - } - TypedStmtKind::ForEach { - iterator, - iterable, - body, - .. - } => { - collect_uses_expr(analysis, iterable, uses); - defs.insert(iterator.clone()); - collect_def_use_stmt(analysis, body, defs, uses); - } - TypedStmtKind::Return(None) - | TypedStmtKind::Break - | TypedStmtKind::Continue - | TypedStmtKind::Needs(_) - | TypedStmtKind::StructDecl { .. } => {} - } -} - -fn collect_uses_expr( - analysis: &mut LivenessAnalysis, - expr: &TypedExpr, - uses: &mut HashSet, -) { - match &expr.kind { - TypedExprKind::Identifier(name) => { - uses.insert(name.clone()); - } - TypedExprKind::Binary { left, right, .. } => { - collect_uses_expr(analysis, left, uses); - collect_uses_expr(analysis, right, uses); - } - TypedExprKind::Unary { operand, .. } => { - collect_uses_expr(analysis, operand, uses); - } - TypedExprKind::And { left, right } | TypedExprKind::Or { left, right } => { - collect_uses_expr(analysis, left, uses); - collect_uses_expr(analysis, right, uses); - } - TypedExprKind::Call { callee, args } => { - collect_uses_expr(analysis, callee, uses); - for arg in args { - collect_uses_expr(analysis, arg, uses); - } - } - TypedExprKind::Assign { name, value } => { - uses.insert(name.clone()); - collect_uses_expr(analysis, value, uses); - } - TypedExprKind::Grouping(inner) => { - collect_uses_expr(analysis, inner, uses); - } - TypedExprKind::If { - condition, - then_branch, - else_branch, - } => { - collect_uses_expr(analysis, condition, uses); - collect_uses_expr(analysis, then_branch, uses); - collect_uses_expr(analysis, else_branch, uses); - } - TypedExprKind::Lambda(inner) => { - collect_uses_expr(analysis, inner, uses); - } - TypedExprKind::LambdaInner { captures, .. } => { - for (name, _) in captures { - uses.insert(name.clone()); - analysis.captured_vars.insert(name.clone()); - } - } - TypedExprKind::Member { object, .. } => { - collect_uses_expr(analysis, object, uses); - } - TypedExprKind::ArrayLiteral { elements, .. } - | TypedExprKind::VecLiteral { elements, .. } => { - for elem in elements { - collect_uses_expr(analysis, elem, uses); - } - } - TypedExprKind::ArraySized { size, .. } => { - collect_uses_expr(analysis, size, uses); - } - TypedExprKind::Index { object, index } => { - collect_uses_expr(analysis, object, uses); - collect_uses_expr(analysis, index, uses); - } - TypedExprKind::IndexAssign { - object, - index, - value, - } => { - collect_uses_expr(analysis, object, uses); - collect_uses_expr(analysis, index, uses); - collect_uses_expr(analysis, value, uses); - } - TypedExprKind::Range { start, end, .. } => { - if let Some(s) = start { - collect_uses_expr(analysis, s, uses); - } - if let Some(e) = end { - collect_uses_expr(analysis, e, uses); - } - } - TypedExprKind::Slice { object, range } => { - collect_uses_expr(analysis, object, uses); - collect_uses_expr(analysis, range, uses); - } - TypedExprKind::FmtString(parts) => { - for part in parts { - if let aelys_sema::TypedFmtStringPart::Expr(e) = part { - collect_uses_expr(analysis, e, uses); - } - } - } - TypedExprKind::StructLiteral { fields, .. } => { - for (_, value) in fields { - collect_uses_expr(analysis, value, uses); - } - } - TypedExprKind::Cast { expr, .. } => { - collect_uses_expr(analysis, expr, uses); - } - TypedExprKind::Int(_) - | TypedExprKind::Float(_) - | TypedExprKind::Bool(_) - | TypedExprKind::String(_) - | TypedExprKind::Null => {} - } -} diff --git a/backend/src/compiler/liveness/last_use.rs b/backend/src/compiler/liveness/last_use.rs deleted file mode 100644 index b772fd0..0000000 --- a/backend/src/compiler/liveness/last_use.rs +++ /dev/null @@ -1,187 +0,0 @@ -use super::LivenessAnalysis; -use aelys_sema::{TypedExpr, TypedExprKind, TypedStmt, TypedStmtKind}; -use std::collections::HashSet; - -pub(super) fn compute_last_use_points(analysis: &mut LivenessAnalysis, stmts: &[TypedStmt]) { - for (idx, stmt) in stmts.iter().enumerate() { - let mut uses = HashSet::new(); - collect_all_uses_in_stmt(stmt, &mut uses); - - for var in uses { - if !analysis.captured_vars.contains(&var) { - analysis.last_use_point.insert(var, idx); - } - } - } -} - -fn collect_all_uses_in_stmt(stmt: &TypedStmt, uses: &mut HashSet) { - match &stmt.kind { - TypedStmtKind::Expression(expr) => { - collect_all_uses_in_expr(expr, uses); - } - TypedStmtKind::Let { initializer, .. } => { - collect_all_uses_in_expr(initializer, uses); - } - TypedStmtKind::Block(stmts) => { - for s in stmts { - collect_all_uses_in_stmt(s, uses); - } - } - TypedStmtKind::If { - condition, - then_branch, - else_branch, - } => { - collect_all_uses_in_expr(condition, uses); - collect_all_uses_in_stmt(then_branch, uses); - if let Some(else_b) = else_branch { - collect_all_uses_in_stmt(else_b, uses); - } - } - TypedStmtKind::While { condition, body } => { - collect_all_uses_in_expr(condition, uses); - collect_all_uses_in_stmt(body, uses); - } - TypedStmtKind::For { - start, - end, - step, - body, - .. - } => { - collect_all_uses_in_expr(start, uses); - collect_all_uses_in_expr(end, uses); - if let Some(s) = &**step { - collect_all_uses_in_expr(s, uses); - } - collect_all_uses_in_stmt(body, uses); - } - TypedStmtKind::Return(Some(expr)) => { - collect_all_uses_in_expr(expr, uses); - } - TypedStmtKind::Function(inner_func) => { - for (name, _) in &inner_func.captures { - uses.insert(name.clone()); - } - } - TypedStmtKind::ForEach { iterable, body, .. } => { - collect_all_uses_in_expr(iterable, uses); - collect_all_uses_in_stmt(body, uses); - } - TypedStmtKind::Return(None) - | TypedStmtKind::Break - | TypedStmtKind::Continue - | TypedStmtKind::Needs(_) - | TypedStmtKind::StructDecl { .. } => {} - } -} - -fn collect_all_uses_in_expr(expr: &TypedExpr, uses: &mut HashSet) { - match &expr.kind { - TypedExprKind::Identifier(name) => { - uses.insert(name.clone()); - } - TypedExprKind::Binary { left, right, .. } => { - collect_all_uses_in_expr(left, uses); - collect_all_uses_in_expr(right, uses); - } - TypedExprKind::Unary { operand, .. } => { - collect_all_uses_in_expr(operand, uses); - } - TypedExprKind::And { left, right } | TypedExprKind::Or { left, right } => { - collect_all_uses_in_expr(left, uses); - collect_all_uses_in_expr(right, uses); - } - TypedExprKind::Call { callee, args } => { - collect_all_uses_in_expr(callee, uses); - for arg in args { - collect_all_uses_in_expr(arg, uses); - } - } - TypedExprKind::Assign { name, value } => { - uses.insert(name.clone()); - collect_all_uses_in_expr(value, uses); - } - TypedExprKind::Grouping(inner) => { - collect_all_uses_in_expr(inner, uses); - } - TypedExprKind::If { - condition, - then_branch, - else_branch, - } => { - collect_all_uses_in_expr(condition, uses); - collect_all_uses_in_expr(then_branch, uses); - collect_all_uses_in_expr(else_branch, uses); - } - TypedExprKind::Lambda(inner) => { - collect_all_uses_in_expr(inner, uses); - } - TypedExprKind::LambdaInner { captures, body, .. } => { - for (name, _) in captures { - uses.insert(name.clone()); - } - for s in body { - collect_all_uses_in_stmt(s, uses); - } - } - TypedExprKind::Member { object, .. } => { - collect_all_uses_in_expr(object, uses); - } - TypedExprKind::ArrayLiteral { elements, .. } - | TypedExprKind::VecLiteral { elements, .. } => { - for elem in elements { - collect_all_uses_in_expr(elem, uses); - } - } - TypedExprKind::ArraySized { size, .. } => { - collect_all_uses_in_expr(size, uses); - } - TypedExprKind::Index { object, index } => { - collect_all_uses_in_expr(object, uses); - collect_all_uses_in_expr(index, uses); - } - TypedExprKind::IndexAssign { - object, - index, - value, - } => { - collect_all_uses_in_expr(object, uses); - collect_all_uses_in_expr(index, uses); - collect_all_uses_in_expr(value, uses); - } - TypedExprKind::Range { start, end, .. } => { - if let Some(s) = start { - collect_all_uses_in_expr(s, uses); - } - if let Some(e) = end { - collect_all_uses_in_expr(e, uses); - } - } - TypedExprKind::Slice { object, range } => { - collect_all_uses_in_expr(object, uses); - collect_all_uses_in_expr(range, uses); - } - TypedExprKind::FmtString(parts) => { - for part in parts { - if let aelys_sema::TypedFmtStringPart::Expr(e) = part { - collect_all_uses_in_expr(e, uses); - } - } - } - TypedExprKind::StructLiteral { fields, .. } => { - for (_, value) in fields { - collect_all_uses_in_expr(value, uses); - } - } - TypedExprKind::Cast { expr, .. } => { - collect_all_uses_in_expr(expr, uses); - } - TypedExprKind::Int(_) - | TypedExprKind::Float(_) - | TypedExprKind::Bool(_) - | TypedExprKind::String(_) - | TypedExprKind::Null => {} - } -} diff --git a/backend/src/compiler/liveness/liveness.rs b/backend/src/compiler/liveness/liveness.rs deleted file mode 100644 index 623941e..0000000 --- a/backend/src/compiler/liveness/liveness.rs +++ /dev/null @@ -1,54 +0,0 @@ -use super::{ControlFlowGraph, LivenessAnalysis}; -use std::collections::HashSet; - -pub(super) fn compute_liveness(analysis: &mut LivenessAnalysis, cfg: &ControlFlowGraph) { - for block in &cfg.blocks { - analysis.live_in.insert(block.id, HashSet::new()); - analysis.live_out.insert(block.id, HashSet::new()); - } - - let mut changed = true; - while changed { - changed = false; - - for block_id in (0..cfg.blocks.len()).rev() { - let block = &cfg.blocks[block_id]; - - let mut new_out: HashSet = HashSet::new(); - for &succ_id in &block.successors { - if let Some(succ_in) = analysis.live_in.get(&succ_id) { - new_out.extend(succ_in.iter().cloned()); - } - } - - let old_out = analysis - .live_out - .get(&block_id) - .cloned() - .unwrap_or_default(); - if new_out != old_out { - changed = true; - analysis.live_out.insert(block_id, new_out.clone()); - } - - let mut new_in = new_out.clone(); - - for &stmt_idx in block.stmts.iter().rev() { - if let Some(defs) = analysis.def.get(&stmt_idx) { - for def in defs { - new_in.remove(def); - } - } - if let Some(uses) = analysis.use_set.get(&stmt_idx) { - new_in.extend(uses.iter().cloned()); - } - } - - let old_in = analysis.live_in.get(&block_id).cloned().unwrap_or_default(); - if new_in != old_in { - changed = true; - analysis.live_in.insert(block_id, new_in); - } - } - } -} diff --git a/backend/src/compiler/liveness/mod.rs b/backend/src/compiler/liveness/mod.rs deleted file mode 100644 index 70d6a13..0000000 --- a/backend/src/compiler/liveness/mod.rs +++ /dev/null @@ -1,62 +0,0 @@ -use std::collections::{HashMap, HashSet}; - -// FIXME: liveness analysis is pretty basic, doesn't handle all control flow - -mod analysis; -mod cfg; -mod def_use; -mod last_use; -#[allow(clippy::module_inception)] -mod liveness; - -#[derive(Debug, Clone, Default)] -pub struct BasicBlock { - pub id: usize, - pub stmts: Vec, - pub successors: Vec, - pub predecessors: Vec, -} - -#[derive(Debug, Clone)] -pub struct ControlFlowGraph { - pub blocks: Vec, - pub stmt_to_block: HashMap, - pub entry: usize, - pub exits: Vec, -} - -#[derive(Debug, Clone, Default)] -pub struct LivenessAnalysis { - pub live_in: HashMap>, - pub live_out: HashMap>, - pub def: HashMap>, - pub use_set: HashMap>, - pub last_use_point: HashMap, - pub captured_vars: HashSet, -} - -impl LivenessAnalysis { - pub fn is_dead_after(&self, var_name: &str, stmt_idx: usize) -> bool { - if self.captured_vars.contains(var_name) { - return false; - } - - if let Some(&last_use) = self.last_use_point.get(var_name) { - return stmt_idx >= last_use; - } - - true - } - - pub fn get_dead_vars_after( - &self, - stmt_idx: usize, - defined_vars: &HashSet, - ) -> Vec { - defined_vars - .iter() - .filter(|var| self.is_dead_after(var, stmt_idx)) - .cloned() - .collect() - } -} diff --git a/backend/src/compiler/locals.rs b/backend/src/compiler/locals.rs deleted file mode 100644 index 3a6ddd7..0000000 --- a/backend/src/compiler/locals.rs +++ /dev/null @@ -1,21 +0,0 @@ -use super::{Compiler, Local}; - -impl Compiler { - pub fn add_local( - &mut self, - name: String, - mutable: bool, - register: u8, - resolved_type: aelys_sema::ResolvedType, - ) { - self.locals.push(Local { - name, - depth: self.scope_depth, - mutable, - register, - is_captured: false, - resolved_type, - is_freed: false, - }); - } -} diff --git a/backend/src/compiler/loops/for_loop.rs b/backend/src/compiler/loops/for_loop.rs deleted file mode 100644 index 3465ab3..0000000 --- a/backend/src/compiler/loops/for_loop.rs +++ /dev/null @@ -1,146 +0,0 @@ -use super::super::{Compiler, LoopContext}; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, ExprKind, Stmt}; - -impl Compiler { - // Inspired by Lua's FORPREP/FORLOOP super-instructions - // The trick: subtract step BEFORE the loop starts, then ForLoopI always adds it back - // This way we don't need separate "first iteration" logic - // - // Structure: - // 1. iter = iter - step (compensate for ForLoopI's increment) - // 2. Jump forward to ForLoopI - // 3. - // 4. ForLoopI: iter += step, check condition, jump back to body if true - #[allow(clippy::too_many_arguments)] - pub fn compile_for( - &mut self, - iterator: &str, - start: &Expr, - end: &Expr, - inclusive: bool, - step: Option<&Expr>, - body: &Stmt, - span: Span, - ) -> Result<()> { - // Try to figure out loop direction at compile time. - // If we can't (e.g., `for i in a..b` where a,b are variables), we emit - // runtime detection code which is slower but necessary. - let compile_time_direction: Option = if let Some(step_expr) = step { - if let ExprKind::Int(step_val) = &step_expr.kind { - Some(*step_val > 0) - } else { - None // step is a variable, can't know at compile time - } - } else { - match (&start.kind, &end.kind) { - (ExprKind::Int(start_val), ExprKind::Int(end_val)) => Some(start_val <= end_val), - _ => None, - } - }; - - self.begin_scope(); - - // Allocate 3 consecutive registers for iter, end, step. - // ForLoopI requires these to be adjacent in the register window. - let iter_reg = self.alloc_consecutive_registers_for_call(3, span)?; - let end_reg = iter_reg + 1; - let step_reg = iter_reg + 2; - - self.register_pool[iter_reg as usize] = true; - self.register_pool[end_reg as usize] = true; - self.register_pool[step_reg as usize] = true; - self.next_register = self.next_register.max(step_reg + 1); - - self.add_local( - iterator.to_string(), - false, - iter_reg, - aelys_sema::ResolvedType::Dynamic, - ); - self.loop_variables.push(iterator.to_string()); - - self.compile_expr(start, iter_reg)?; - self.compile_expr(end, end_reg)?; - - if let Some(step_expr) = step { - self.compile_expr(step_expr, step_reg)?; - } else if let Some(direction) = compile_time_direction { - let step_val = if direction { 1i16 } else { -1i16 }; - self.emit_b(OpCode::LoadI, step_reg, step_val, span); - } else { - // Runtime direction detection - only used when we can't determine - // direction at compile time. Costs a few extra instructions but - // beats having the user specify step explicitly. - let temp_reg = self.alloc_register()?; - - // if start > end: step = -1, else step = 1 - self.emit_a(OpCode::Gt, temp_reg, iter_reg, end_reg, span); - - let jump_to_pos = self.emit_jump_if(OpCode::JumpIfNot, temp_reg, span); - self.emit_b(OpCode::LoadI, step_reg, -1, span); - let jump_past = self.emit_jump(OpCode::Jump, span); - - self.patch_jump(jump_to_pos); - self.emit_b(OpCode::LoadI, step_reg, 1, span); - - self.patch_jump(jump_past); - self.free_register(temp_reg); - } - - self.emit_a(OpCode::Sub, iter_reg, iter_reg, step_reg, span); - let jump_to_forloop = self.emit_jump(OpCode::Jump, span); - - let body_start = self.current_offset(); - - self.loop_stack.push(LoopContext { - start: body_start, - break_jumps: Vec::new(), - continue_jumps: Vec::new(), - is_for_loop: true, - }); - - self.compile_stmt(body)?; - - let forloop_pos = self.current_offset(); - - self.patch_jump(jump_to_forloop); - - // Patch continue statements to jump to ForLoopI instead of past the loop. - // This is slightly ugly - we're manually reconstructing the instruction encoding. - // TODO: maybe add a helper for patching just the offset part of a jump? - if let Some(ctx) = self.loop_stack.last() { - for &continue_jump in &ctx.continue_jumps { - let dist = (forloop_pos - continue_jump - 1) as i16; - let instr = self.current.bytecode_at(continue_jump); - let op = instr >> 24; - let a = (instr >> 16) & 0xFF; - *self.current.bytecode_mut(continue_jump) = - (op << 24) | (a << 16) | ((dist as u16) as u32); - } - } - - let loop_opcode = if inclusive { - OpCode::ForLoopIInc - } else { - OpCode::ForLoopI - }; - let jump_back_dist = -((forloop_pos - body_start + 1) as i16); - self.emit_b(loop_opcode, iter_reg, jump_back_dist, span); - - if let Some(loop_ctx) = self.loop_stack.pop() { - for break_jump in loop_ctx.break_jumps { - self.patch_jump(break_jump); - } - } - - self.free_register(step_reg); - self.free_register(end_reg); - self.loop_variables.pop(); - self.end_scope(); - - Ok(()) - } -} diff --git a/backend/src/compiler/loops/mod.rs b/backend/src/compiler/loops/mod.rs deleted file mode 100644 index 8ac6bc6..0000000 --- a/backend/src/compiler/loops/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod for_loop; -mod while_loop; diff --git a/backend/src/compiler/loops/while_loop.rs b/backend/src/compiler/loops/while_loop.rs deleted file mode 100644 index 2a74672..0000000 --- a/backend/src/compiler/loops/while_loop.rs +++ /dev/null @@ -1,115 +0,0 @@ -use super::super::{Compiler, LoopContext}; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::ast::{BinaryOp, Expr, ExprKind, Stmt}; - -impl Compiler { - pub fn compile_while(&mut self, condition: &Expr, body: &Stmt) -> Result<()> { - // Fast path: `while i < bound` is super common, use specialized opcode - if let Some(optimized) = self.try_compile_while_loop_lt(condition, body)? { - return Ok(optimized); - } - self.compile_while_generic(condition, body) - } - - // WhileLoopLt: fuses comparison + conditional jump into one instruction. - // Only works for `while local_var < expr` pattern - anything else falls back to generic. - pub fn try_compile_while_loop_lt( - &mut self, - condition: &Expr, - body: &Stmt, - ) -> Result> { - if let ExprKind::Binary { - op: BinaryOp::Lt, - left, - right, - } = &condition.kind - && let ExprKind::Identifier(name) = &left.kind - && let Some((iter_reg, _)) = self.resolve_variable(name) - { - // Need the bound in the register right after the iterator. - // WhileLoopLt expects them adjacent. If that register is taken, bail. - // FIXME: could spill/move but probably not worth the complexity - let bound_reg = match iter_reg.checked_add(1) { - Some(r) if (r as usize) < self.register_pool.len() => r, - _ => return Ok(None), - }; - - if self.register_pool[bound_reg as usize] { - return Ok(None); - } - - self.register_pool[bound_reg as usize] = true; - if bound_reg >= self.next_register { - self.next_register = bound_reg + 1; - } - - self.compile_expr(right, bound_reg)?; - - let loop_start = self.current_offset(); - - self.loop_stack.push(LoopContext { - start: loop_start, - break_jumps: Vec::new(), - continue_jumps: Vec::new(), - is_for_loop: false, - }); - - self.emit_b(OpCode::WhileLoopLt, iter_reg, 1, condition.span); - let jump_to_end = self.emit_jump(OpCode::Jump, condition.span); - - self.compile_stmt(body)?; - - let jump_dist = (self.current_offset() - loop_start + 1) as i16; - self.emit_b(OpCode::Jump, 0, -jump_dist, body.span); - - self.patch_jump(jump_to_end); - - if let Some(loop_ctx) = self.loop_stack.pop() { - for break_jump in loop_ctx.break_jumps { - self.patch_jump(break_jump); - } - } - - self.register_pool[bound_reg as usize] = false; - - return Ok(Some(())); - } - - Ok(None) - } - - // Fallback for arbitrary conditions. Less efficient than WhileLoopLt - // but handles everything: `while a && b`, `while func()`, etc. - pub fn compile_while_generic(&mut self, condition: &Expr, body: &Stmt) -> Result<()> { - let loop_start = self.current_offset(); - - self.loop_stack.push(LoopContext { - start: loop_start, - break_jumps: Vec::new(), - continue_jumps: Vec::new(), - is_for_loop: false, - }); - - let cond_reg = self.alloc_register()?; - self.compile_expr(condition, cond_reg)?; - - let jump_to_end = self.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - self.free_register(cond_reg); - - self.compile_stmt(body)?; - - let jump_dist = (self.current_offset() - loop_start + 1) as i16; - self.emit_b(OpCode::Jump, 0, -jump_dist, body.span); - - self.patch_jump(jump_to_end); - - if let Some(loop_ctx) = self.loop_stack.pop() { - for break_jump in loop_ctx.break_jumps { - self.patch_jump(break_jump); - } - } - - Ok(()) - } -} diff --git a/backend/src/compiler/mod.rs b/backend/src/compiler/mod.rs deleted file mode 100644 index 275719b..0000000 --- a/backend/src/compiler/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -mod builtins; -mod call; -mod constructors; -mod emit; -mod expr; -mod functions; -mod globals; -mod lambda; -pub mod liveness; -mod locals; -mod loops; -mod pipeline; -mod scope; -mod state; -mod stmt; - -pub use state::{Compiler, Local, LoopContext, Scope, Upvalue}; diff --git a/backend/src/compiler/pipeline.rs b/backend/src/compiler/pipeline.rs deleted file mode 100644 index 709cb68..0000000 --- a/backend/src/compiler/pipeline.rs +++ /dev/null @@ -1,106 +0,0 @@ -use super::Compiler; -use aelys_bytecode::{Function, GlobalLayout, Heap, OpCode}; -use aelys_common::Result; -use aelys_sema::{TypedProgram, TypedStmtKind}; -use std::collections::HashMap; -use std::sync::Arc; - -impl Compiler { - pub fn compile_typed( - mut self, - program: &TypedProgram, - ) -> Result<(Function, Heap, HashMap)> { - for stmt in &program.stmts { - match &stmt.kind { - TypedStmtKind::Function(func) => { - self.globals.insert(func.name.clone(), false); - if !self.global_indices.contains_key(&func.name) { - let idx = self.next_global_index; - self.global_indices.insert(func.name.clone(), idx); - self.next_global_index += 1; - } - } - TypedStmtKind::Let { name, mutable, .. } => { - self.globals.insert(name.clone(), *mutable); - if !self.global_indices.contains_key(name) { - let idx = self.next_global_index; - self.global_indices.insert(name.clone(), idx); - self.next_global_index += 1; - } - } - _ => {} - } - } - - if program.stmts.is_empty() { - self.emit_return0(aelys_syntax::Span::dummy()); - } else { - let last_idx = program.stmts.len() - 1; - - for stmt in &program.stmts[..last_idx] { - self.compile_typed_stmt(stmt)?; - } - - let last_stmt = &program.stmts[last_idx]; - match &last_stmt.kind { - TypedStmtKind::Expression(expr) => { - let result_reg = self.alloc_register()?; - self.compile_typed_expr(expr, result_reg)?; - self.emit_a(OpCode::Return, result_reg, 0, 0, last_stmt.span); - } - // If with else branch can return a value - TypedStmtKind::If { - condition, - then_branch, - else_branch: Some(else_branch), - } => { - let result_reg = self.alloc_register()?; - let cond_reg = self.alloc_register()?; - self.compile_typed_expr(condition, cond_reg)?; - let else_jump = self.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - self.free_register(cond_reg); - - self.compile_typed_if_branch_for_return(then_branch, result_reg)?; - let end_jump = self.emit_jump(OpCode::Jump, then_branch.span); - self.patch_jump(else_jump); - self.compile_typed_if_branch_for_return(else_branch, result_reg)?; - self.patch_jump(end_jump); - - self.emit_a(OpCode::Return, result_reg, 0, 0, last_stmt.span); - } - // Block can return value of its last expression - TypedStmtKind::Block(_) => { - let result_reg = self.alloc_register()?; - self.compile_typed_if_branch_for_return(last_stmt, result_reg)?; - self.emit_a(OpCode::Return, result_reg, 0, 0, last_stmt.span); - } - _ => { - self.compile_typed_stmt(last_stmt)?; - self.emit_return0(last_stmt.span); - } - } - } - - self.current.num_registers = self.next_register; - self.current.call_site_count = self.next_call_site_slot; - self.current.global_layout = self.build_global_layout(); - self.current.compute_global_layout_hash(); - self.current.finalize_bytecode(); - - Ok((self.current, self.heap, self.globals)) - } - - pub(super) fn build_global_layout(&self) -> Arc { - if self.accessed_globals.is_empty() { - GlobalLayout::empty() - } else { - let mut names = vec![String::new(); self.next_global_index as usize]; - for (name, &idx) in &self.global_indices { - if self.accessed_globals.contains(name) { - names[idx as usize] = name.clone(); - } - } - GlobalLayout::new(names) - } - } -} diff --git a/backend/src/compiler/scope/liveness.rs b/backend/src/compiler/scope/liveness.rs deleted file mode 100644 index 99684e3..0000000 --- a/backend/src/compiler/scope/liveness.rs +++ /dev/null @@ -1,30 +0,0 @@ -use super::super::Compiler; -use std::collections::HashSet; - -impl Compiler { - pub fn free_dead_locals( - &mut self, - stmt_idx: usize, - liveness: &super::super::liveness::LivenessAnalysis, - already_freed: &mut HashSet, - ) -> usize { - let mut freed = 0; - for local in &mut self.locals { - if local.is_freed || already_freed.contains(&local.name) { - continue; - } - - if local.is_captured { - continue; - } - - if liveness.is_dead_after(&local.name, stmt_idx) { - self.register_pool[local.register as usize] = false; - local.is_freed = true; - already_freed.insert(local.name.clone()); - freed += 1; - } - } - freed - } -} diff --git a/backend/src/compiler/scope/locals.rs b/backend/src/compiler/scope/locals.rs deleted file mode 100644 index bdb99fc..0000000 --- a/backend/src/compiler/scope/locals.rs +++ /dev/null @@ -1,51 +0,0 @@ -use super::super::{Compiler, Local}; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; - -impl Compiler { - pub fn declare_variable(&mut self, name: &str, mutable: bool) -> Result { - for local in self.locals.iter().rev() { - if local.depth < self.scope_depth { - break; - } - if local.name == name { - return Err(CompileError::new( - CompileErrorKind::VariableAlreadyDefined(name.to_string()), - Span::dummy(), - self.source.clone(), - ) - .into()); - } - } - - let register = self.alloc_register()?; - - self.locals.push(Local { - name: name.to_string(), - depth: self.scope_depth, - mutable, - register, - is_captured: false, - resolved_type: aelys_sema::ResolvedType::Dynamic, - is_freed: false, - }); - - Ok(register) - } - - pub fn mark_local_captured(&mut self, register: u8) { - for local in self.locals.iter_mut() { - if local.register == register { - local.is_captured = true; - break; - } - } - - if let Some(scope) = self.scopes.last_mut() - && !scope.captured_registers.contains(®ister) - { - scope.captured_registers.push(register); - } - } -} diff --git a/backend/src/compiler/scope/mod.rs b/backend/src/compiler/scope/mod.rs deleted file mode 100644 index be8a6a0..0000000 --- a/backend/src/compiler/scope/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -mod liveness; -mod locals; -mod registers; -mod resolve; -mod scopes; -mod types; diff --git a/backend/src/compiler/scope/registers.rs b/backend/src/compiler/scope/registers.rs deleted file mode 100644 index 0570113..0000000 --- a/backend/src/compiler/scope/registers.rs +++ /dev/null @@ -1,85 +0,0 @@ -use super::super::Compiler; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; - -impl Compiler { - pub fn alloc_register(&mut self) -> Result { - for (i, used) in self.register_pool.iter_mut().enumerate() { - if !*used { - *used = true; - self.next_register = self.next_register.max(i as u8 + 1); - return Ok(i as u8); - } - } - Err(CompileError::new( - CompileErrorKind::TooManyRegisters, - Span::dummy(), - self.source.clone(), - ) - .into()) - } - - pub fn free_register(&mut self, reg: u8) { - self.register_pool[reg as usize] = false; - } - - // contiguous block for call args - pub fn alloc_consecutive_registers_for_call(&self, n: u8, span: Span) -> Result { - let n = n as usize; - let pool_len = self.register_pool.len(); - - 'outer: for start in 0..pool_len { - if start + n > pool_len { - break; - } - for offset in 0..n { - if self.register_pool[start + offset] { - continue 'outer; - } - } - return Ok(start as u8); - } - - Err(CompileError::new( - CompileErrorKind::TooManyRegisters, - span, - self.source.clone(), - ) - .into()) - } - - pub fn alloc_consecutive_from(&mut self, start: u8, count: u8) -> Result { - let start_usize = start as usize; - let count_usize = count as usize; - let end_usize = start_usize + count_usize; - - if end_usize > 256 { - return Err(CompileError::new( - CompileErrorKind::TooManyRegisters, - Span::dummy(), - self.source.clone(), - ) - .into()); - } - - // Safety check: verify none of the registers are already in use - for i in start_usize..end_usize { - if self.register_pool[i] { - return Err(CompileError::new( - CompileErrorKind::TooManyRegisters, - Span::dummy(), - self.source.clone(), - ) - .into()); - } - } - - for i in start_usize..end_usize { - self.register_pool[i] = true; - } - - self.next_register = self.next_register.max(end_usize as u8); - Ok(start) - } -} diff --git a/backend/src/compiler/scope/resolve.rs b/backend/src/compiler/scope/resolve.rs deleted file mode 100644 index 2028338..0000000 --- a/backend/src/compiler/scope/resolve.rs +++ /dev/null @@ -1,90 +0,0 @@ -use super::super::{Compiler, Upvalue}; - -// TODO: this clone dance for enclosing_locals is ugly, maybe Rc -impl Compiler { - pub fn resolve_variable_typed( - &self, - name: &str, - ) -> Option<(u8, bool, &aelys_sema::ResolvedType)> { - for local in self.locals.iter().rev() { - if local.is_freed { - continue; - } - if local.name == name { - return Some((local.register, local.mutable, &local.resolved_type)); - } - } - None - } - - pub fn resolve_variable(&self, name: &str) -> Option<(u8, bool)> { - for local in self.locals.iter().rev() { - if local.is_freed { - continue; - } - if local.name == name { - return Some((local.register, local.mutable)); - } - } - None - } - - pub fn resolve_upvalue(&mut self, name: &str) -> Option<(u8, bool)> { - for (i, upvalue) in self.upvalues.iter().enumerate() { - if upvalue.name == name { - return Some((i as u8, upvalue.mutable)); - } - } - - if let Some(ref mut enclosing_locals) = self.enclosing_locals.clone() { - for (i, local) in enclosing_locals.iter().enumerate() { - if local.name == name { - if let Some(ref mut locals) = self.enclosing_locals { - locals[i].is_captured = true; - } - - let upvalue_index = self.upvalues.len() as u8; - self.upvalues.push(Upvalue { - is_local: true, - index: local.register, - name: name.to_string(), - mutable: local.mutable, - }); - return Some((upvalue_index, local.mutable)); - } - } - } - - if let Some(ref enclosing_upvalues) = self.enclosing_upvalues.clone() { - for (i, upvalue) in enclosing_upvalues.iter().enumerate() { - if upvalue.name == name { - let upvalue_index = self.upvalues.len() as u8; - self.upvalues.push(Upvalue { - is_local: false, - index: i as u8, - name: name.to_string(), - mutable: upvalue.mutable, - }); - return Some((upvalue_index, upvalue.mutable)); - } - } - } - - for (depth, ancestor_locals) in self.all_enclosing_locals.iter().enumerate().skip(1) { - for local in ancestor_locals.iter() { - if local.name == name { - let upvalue_index = self.upvalues.len() as u8; - self.upvalues.push(Upvalue { - is_local: false, - index: (depth - 1) as u8 | 0x80, - name: name.to_string(), - mutable: local.mutable, - }); - return Some((upvalue_index, local.mutable)); - } - } - } - - None - } -} diff --git a/backend/src/compiler/scope/scopes.rs b/backend/src/compiler/scope/scopes.rs deleted file mode 100644 index b304311..0000000 --- a/backend/src/compiler/scope/scopes.rs +++ /dev/null @@ -1,31 +0,0 @@ -use super::super::{Compiler, Scope}; -use aelys_bytecode::OpCode; - -impl Compiler { - pub fn begin_scope(&mut self) { - self.scope_depth += 1; - self.scopes.push(Scope { - start: self.locals.len(), - captured_registers: Vec::new(), - }); - } - - // End scope: close upvalues for captured variables, free registers. - // Note: we only emit CloseUpvals if something was actually captured. - // Earlier versions emitted it unconditionally which was wasteful - pub fn end_scope(&mut self) { - self.scope_depth = self.scope_depth.saturating_sub(1); - - if let Some(scope) = self.scopes.pop() { - // Only need to close upvalues if any locals were captured - if let Some(&lowest_captured) = scope.captured_registers.iter().min() { - self.current - .emit_a(OpCode::CloseUpvals, lowest_captured, 0, 0, 0); - } - - for local in self.locals.drain(scope.start..) { - self.register_pool[local.register as usize] = false; - } - } - } -} diff --git a/backend/src/compiler/scope/types.rs b/backend/src/compiler/scope/types.rs deleted file mode 100644 index 31108bf..0000000 --- a/backend/src/compiler/scope/types.rs +++ /dev/null @@ -1,9 +0,0 @@ -use super::super::Compiler; - -impl Compiler { - // deprecated - types tracked via TypedExpr now - pub fn get_register_type(&self, _reg: u8) -> aelys_sema::ResolvedType { - aelys_sema::ResolvedType::Dynamic - } - pub fn set_register_type(&mut self, _reg: u8, _typ: aelys_sema::ResolvedType) {} -} diff --git a/backend/src/compiler/state.rs b/backend/src/compiler/state.rs deleted file mode 100644 index 9c240e7..0000000 --- a/backend/src/compiler/state.rs +++ /dev/null @@ -1,67 +0,0 @@ -use aelys_bytecode::{Function, Heap}; -use aelys_sema::ResolvedType; -use aelys_syntax::Source; -use std::collections::{HashMap, HashSet}; -use std::rc::Rc; -use std::sync::Arc; - -#[derive(Debug, Clone)] -pub struct Local { - pub name: String, - pub depth: usize, - pub mutable: bool, - pub register: u8, - pub is_captured: bool, // closure capture - pub resolved_type: ResolvedType, - pub is_freed: bool, // liveness freed this reg -} - -#[derive(Debug, Clone)] -pub struct Upvalue { - pub is_local: bool, // true = from enclosing locals, false = from enclosing upvalues - pub index: u8, // reg if is_local, upvalue idx otherwise - pub name: String, - pub mutable: bool, -} - -#[derive(Debug, Clone)] -pub struct Scope { - pub start: usize, - pub captured_registers: Vec, // for CloseUpvals -} - -#[derive(Debug, Clone)] -pub struct LoopContext { - pub start: usize, - pub break_jumps: Vec, - pub continue_jumps: Vec, // for-loop: forward patch to increment - pub is_for_loop: bool, // for: continue forward, while: continue back -} - -pub struct Compiler { - pub current: Function, - pub source: Arc, - pub scopes: Vec, - pub locals: Vec, - pub upvalues: Vec, - pub enclosing_locals: Option>, - pub enclosing_upvalues: Option>, - pub all_enclosing_locals: Vec>, // transitive capture chain - pub loop_stack: Vec, - pub loop_variables: Vec, // can't assign inside loop body - pub scope_depth: usize, - pub next_register: u8, - pub has_no_gc: bool, - pub heap: Heap, - pub(crate) register_pool: [bool; 256], - pub globals: HashMap, // name -> mutable - pub global_indices: HashMap, - pub next_global_index: u16, - pub module_aliases: Rc>, - pub known_globals: Rc>, - pub known_native_globals: Rc>, - pub symbol_origins: Rc>, // bare name -> qualified name - pub accessed_globals: HashSet, - pub next_call_site_slot: u16, - pub function_depth: usize, -} diff --git a/backend/src/compiler/stmt/control_flow.rs b/backend/src/compiler/stmt/control_flow.rs deleted file mode 100644 index 06d4c13..0000000 --- a/backend/src/compiler/stmt/control_flow.rs +++ /dev/null @@ -1,175 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::Span; - -impl Compiler { - pub fn compile_break(&mut self, span: Span) -> Result<()> { - if self.loop_stack.is_empty() { - return Err(CompileError::new( - CompileErrorKind::BreakOutsideLoop, - span, - self.source.clone(), - ) - .into()); - } - - let jump_offset = self.emit_jump(OpCode::Jump, span); - if let Some(loop_ctx) = self.loop_stack.last_mut() { - loop_ctx.break_jumps.push(jump_offset); - } else { - return Err(CompileError::new( - CompileErrorKind::BreakOutsideLoop, - span, - self.source.clone(), - ) - .into()); - } - - Ok(()) - } - - pub fn compile_continue(&mut self, span: Span) -> Result<()> { - if let Some(loop_ctx) = self.loop_stack.last() { - let is_for_loop = loop_ctx.is_for_loop; - let loop_start = loop_ctx.start; - - if is_for_loop { - let jump_offset = self.emit_jump(OpCode::Jump, span); - if let Some(loop_ctx) = self.loop_stack.last_mut() { - loop_ctx.continue_jumps.push(jump_offset); - } else { - return Err(CompileError::new( - CompileErrorKind::ContinueOutsideLoop, - span, - self.source.clone(), - ) - .into()); - } - } else { - let jump_dist = (self.current_offset() - loop_start + 1) as i16; - self.emit_b(OpCode::Jump, 0, -jump_dist, span); - } - Ok(()) - } else { - Err(CompileError::new( - CompileErrorKind::ContinueOutsideLoop, - span, - self.source.clone(), - ) - .into()) - } - } - - pub fn compile_return( - &mut self, - expr: Option<&aelys_syntax::ast::Expr>, - span: Span, - ) -> Result<()> { - if self.function_depth == 0 { - return Err(CompileError::new( - CompileErrorKind::ReturnOutsideFunction, - span, - self.source.clone(), - ) - .into()); - } - - if self.has_no_gc { - let line = self.current_line(span); - self.current.emit_a(OpCode::ExitNoGc, 0, 0, 0, line); - } - - if let Some(expr) = expr { - let reg = self.alloc_register()?; - self.compile_expr(expr, reg)?; - - let lowest_captured = self - .locals - .iter() - .filter(|l| l.is_captured) - .map(|l| l.register) - .min(); - - if let Some(from_reg) = lowest_captured { - let line = self.current_line(span); - self.current - .emit_a(OpCode::CloseUpvals, from_reg, 0, 0, line); - } - - let line = self.current_line(span); - self.current.emit_a(OpCode::Return, reg, 0, 0, line); - self.free_register(reg); - } else { - let lowest_captured = self - .locals - .iter() - .filter(|l| l.is_captured) - .map(|l| l.register) - .min(); - - if let Some(from_reg) = lowest_captured { - let line = self.current_line(span); - self.current - .emit_a(OpCode::CloseUpvals, from_reg, 0, 0, line); - } - - self.emit_return0(span); - } - Ok(()) - } - - pub fn compile_typed_return( - &mut self, - expr: Option<&aelys_sema::TypedExpr>, - span: Span, - ) -> Result<()> { - if self.function_depth == 0 { - return Err(CompileError::new( - CompileErrorKind::ReturnOutsideFunction, - span, - self.source.clone(), - ) - .into()); - } - - if self.has_no_gc { - self.emit_a(OpCode::ExitNoGc, 0, 0, 0, span); - } - - if let Some(e) = expr { - let reg = self.alloc_register()?; - self.compile_typed_expr(e, reg)?; - - let lowest_captured = self - .locals - .iter() - .filter(|l| l.is_captured) - .map(|l| l.register) - .min(); - - if let Some(from_reg) = lowest_captured { - self.emit_a(OpCode::CloseUpvals, from_reg, 0, 0, span); - } - - self.emit_a(OpCode::Return, reg, 0, 0, span); - self.free_register(reg); - } else { - let lowest_captured = self - .locals - .iter() - .filter(|l| l.is_captured) - .map(|l| l.register) - .min(); - - if let Some(from_reg) = lowest_captured { - self.emit_a(OpCode::CloseUpvals, from_reg, 0, 0, span); - } - - self.emit_a(OpCode::Return0, 0, 0, 0, span); - } - - Ok(()) - } -} diff --git a/backend/src/compiler/stmt/control_if.rs b/backend/src/compiler/stmt/control_if.rs deleted file mode 100644 index a363b4a..0000000 --- a/backend/src/compiler/stmt/control_if.rs +++ /dev/null @@ -1,173 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Stmt, StmtKind}; - -impl Compiler { - pub fn compile_if_branch_for_return(&mut self, branch: &Stmt, dest: u8) -> Result<()> { - match &branch.kind { - StmtKind::Block(stmts) => { - self.begin_scope(); - if stmts.is_empty() { - self.emit_a(OpCode::LoadNull, dest, 0, 0, branch.span); - } else { - for stmt in &stmts[..stmts.len() - 1] { - self.compile_stmt(stmt)?; - } - let last = &stmts[stmts.len() - 1]; - match &last.kind { - StmtKind::Expression(expr) => { - self.compile_expr(expr, dest)?; - } - StmtKind::If { - condition, - then_branch, - else_branch, - } if else_branch.is_some() => { - let cond_reg = self.alloc_register()?; - self.compile_expr(condition, cond_reg)?; - let jump_to_else = - self.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - self.free_register(cond_reg); - - self.compile_if_branch_for_return(then_branch, dest)?; - let jump_to_end = self.emit_jump(OpCode::Jump, then_branch.span); - self.patch_jump(jump_to_else); - - if let Some(else_branch) = else_branch.as_ref() { - self.compile_if_branch_for_return(else_branch, dest)?; - } - self.patch_jump(jump_to_end); - } - _ => { - self.compile_stmt(last)?; - self.emit_a(OpCode::LoadNull, dest, 0, 0, branch.span); - } - } - } - self.end_scope(); - } - StmtKind::Expression(expr) => { - self.compile_expr(expr, dest)?; - } - _ => { - self.compile_stmt(branch)?; - self.emit_a(OpCode::LoadNull, dest, 0, 0, branch.span); - } - } - Ok(()) - } - - pub fn compile_typed_if_branch_for_return( - &mut self, - branch: &aelys_sema::TypedStmt, - dest: u8, - ) -> Result<()> { - use aelys_sema::TypedStmtKind; - - match &branch.kind { - TypedStmtKind::Block(stmts) => { - self.begin_scope(); - if stmts.is_empty() { - self.emit_a(OpCode::LoadNull, dest, 0, 0, branch.span); - } else { - for stmt in &stmts[..stmts.len() - 1] { - self.compile_typed_stmt(stmt)?; - } - let last = &stmts[stmts.len() - 1]; - match &last.kind { - TypedStmtKind::Expression(expr) => { - self.compile_typed_expr(expr, dest)?; - } - TypedStmtKind::If { - condition, - then_branch, - else_branch, - } if else_branch.is_some() => { - let cond_reg = self.alloc_register()?; - self.compile_typed_expr(condition, cond_reg)?; - let jump_to_else = - self.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - self.free_register(cond_reg); - - self.compile_typed_if_branch_for_return(then_branch, dest)?; - let jump_to_end = self.emit_jump(OpCode::Jump, then_branch.span); - self.patch_jump(jump_to_else); - - if let Some(else_branch) = else_branch.as_ref() { - self.compile_typed_if_branch_for_return(else_branch, dest)?; - } - self.patch_jump(jump_to_end); - } - _ => { - self.compile_typed_stmt(last)?; - self.emit_a(OpCode::LoadNull, dest, 0, 0, branch.span); - } - } - } - self.end_scope(); - } - TypedStmtKind::Expression(expr) => { - self.compile_typed_expr(expr, dest)?; - } - _ => { - self.compile_typed_stmt(branch)?; - self.emit_a(OpCode::LoadNull, dest, 0, 0, branch.span); - } - } - Ok(()) - } - - pub fn compile_if( - &mut self, - condition: &aelys_syntax::ast::Expr, - then_branch: &Stmt, - else_branch: Option<&Stmt>, - ) -> Result<()> { - let cond_reg = self.alloc_register()?; - self.compile_expr(condition, cond_reg)?; - let jump_to_else = self.emit_jump_if(OpCode::JumpIfNot, cond_reg, condition.span); - - self.free_register(cond_reg); - self.compile_stmt(then_branch)?; - - if let Some(else_branch) = else_branch { - let jump_to_end = self.emit_jump(OpCode::Jump, then_branch.span); - self.patch_jump(jump_to_else); - self.compile_stmt(else_branch)?; - self.patch_jump(jump_to_end); - } else { - self.patch_jump(jump_to_else); - } - - Ok(()) - } - - pub fn compile_typed_if_stmt( - &mut self, - condition: &aelys_sema::TypedExpr, - then_branch: &aelys_sema::TypedStmt, - else_branch: Option<&aelys_sema::TypedStmt>, - span: Span, - ) -> Result<()> { - let cond_reg = self.alloc_register()?; - self.compile_typed_expr(condition, cond_reg)?; - - let else_jump = self.emit_jump_if(OpCode::JumpIfNot, cond_reg, span); - self.free_register(cond_reg); - - self.compile_typed_stmt(then_branch)?; - - if let Some(else_stmt) = else_branch { - let end_jump = self.emit_jump(OpCode::Jump, span); - self.patch_jump(else_jump); - self.compile_typed_stmt(else_stmt)?; - self.patch_jump(end_jump); - } else { - self.patch_jump(else_jump); - } - - Ok(()) - } -} diff --git a/backend/src/compiler/stmt/decl.rs b/backend/src/compiler/stmt/decl.rs deleted file mode 100644 index 0296734..0000000 --- a/backend/src/compiler/stmt/decl.rs +++ /dev/null @@ -1,74 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Expr, TypeAnnotation}; - -impl Compiler { - pub fn compile_let( - &mut self, - name: &str, - mutable: bool, - _type_annotation: Option<&TypeAnnotation>, - initializer: &Expr, - _is_pub: bool, - ) -> Result<()> { - if self.scope_depth == 0 { - self.globals.insert(name.to_string(), mutable); - - let global_idx = if let Some(&idx) = self.global_indices.get(name) { - idx - } else { - let idx = self.next_global_index; - self.global_indices.insert(name.to_string(), idx); - self.next_global_index += 1; - idx - }; - - let temp_reg = self.alloc_register()?; - self.compile_expr(initializer, temp_reg)?; - - self.accessed_globals.insert(name.to_string()); - self.emit_b( - OpCode::SetGlobalIdx, - temp_reg, - global_idx as i16, - Span::dummy(), - ); - self.free_register(temp_reg); - } else { - let reg = self.declare_variable(name, mutable)?; - self.compile_expr(initializer, reg)?; - } - Ok(()) - } - - pub fn compile_typed_let( - &mut self, - name: &str, - mutable: bool, - initializer: &aelys_sema::TypedExpr, - var_type: &aelys_sema::ResolvedType, - _is_pub: bool, - span: Span, - ) -> Result<()> { - if self.scope_depth == 0 { - let reg = self.alloc_register()?; - self.compile_typed_expr(initializer, reg)?; - - self.globals.insert(name.to_string(), mutable); - let idx = self.get_or_create_global_index_raw(name); - self.accessed_globals.insert(name.to_string()); - - self.emit_b(OpCode::SetGlobalIdx, reg, idx as i16, span); - self.free_register(reg); - } else { - let reg = self.alloc_register()?; - self.compile_typed_expr(initializer, reg)?; - - self.add_local(name.to_string(), mutable, reg, var_type.clone()); - } - - Ok(()) - } -} diff --git a/backend/src/compiler/stmt/expression.rs b/backend/src/compiler/stmt/expression.rs deleted file mode 100644 index e0b1f41..0000000 --- a/backend/src/compiler/stmt/expression.rs +++ /dev/null @@ -1,72 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_syntax::ast::{BinaryOp, Expr, ExprKind}; - -impl Compiler { - pub fn compile_expression_stmt(&mut self, expr: &Expr) -> Result<()> { - if let ExprKind::Assign { name, value } = &expr.kind - && let Some((reg, mutable)) = self.resolve_variable(name) - { - if !mutable { - return Err(CompileError::new( - CompileErrorKind::AssignToImmutable(name.to_string()), - expr.span, - self.source.clone(), - ) - .into()); - } - if self.loop_variables.contains(name) { - return Err(CompileError::new( - CompileErrorKind::AssignToLoopVariable(name.to_string()), - expr.span, - self.source.clone(), - ) - .into()); - } - if let ExprKind::Binary { - left, - op: BinaryOp::Add, - right, - } = &value.kind - { - if let (ExprKind::Identifier(id), ExprKind::Int(n)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - self.emit_a(OpCode::AddI, reg, reg, *n as u8, expr.span); - return Ok(()); - } - if let (ExprKind::Int(n), ExprKind::Identifier(id)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - self.emit_a(OpCode::AddI, reg, reg, *n as u8, expr.span); - return Ok(()); - } - } - if let ExprKind::Binary { - left, - op: BinaryOp::Sub, - right, - } = &value.kind - && let (ExprKind::Identifier(id), ExprKind::Int(n)) = (&left.kind, &right.kind) - && id == name - && *n >= 0 - && *n <= 255 - { - self.emit_a(OpCode::SubI, reg, reg, *n as u8, expr.span); - return Ok(()); - } - self.compile_expr(value, reg)?; - return Ok(()); - } - let temp = self.alloc_register()?; - self.compile_expr(expr, temp)?; - self.free_register(temp); - Ok(()) - } -} diff --git a/backend/src/compiler/stmt/looping.rs b/backend/src/compiler/stmt/looping.rs deleted file mode 100644 index 0dafd6c..0000000 --- a/backend/src/compiler/stmt/looping.rs +++ /dev/null @@ -1,356 +0,0 @@ -use super::Compiler; -use aelys_bytecode::OpCode; -use aelys_common::Result; -use aelys_common::error::{CompileError, CompileErrorKind}; -use aelys_sema::InferType; -use aelys_syntax::Span; - -impl Compiler { - pub fn compile_typed_while( - &mut self, - condition: &aelys_sema::TypedExpr, - body: &aelys_sema::TypedStmt, - span: Span, - ) -> Result<()> { - let loop_start = self.current_offset(); - - self.loop_stack.push(super::super::LoopContext { - start: loop_start, - break_jumps: Vec::new(), - continue_jumps: Vec::new(), - is_for_loop: false, - }); - - let cond_reg = self.alloc_register()?; - self.compile_typed_expr(condition, cond_reg)?; - - let exit_jump = self.emit_jump_if(OpCode::JumpIfNot, cond_reg, span); - self.free_register(cond_reg); - - self.compile_typed_stmt(body)?; - - let jump_dist = (self.current_offset() - loop_start + 1) as i16; - self.emit_b(OpCode::Jump, 0, -jump_dist, span); - - self.patch_jump(exit_jump); - - let ctx = self.loop_stack.pop().ok_or_else(|| { - CompileError::new( - CompileErrorKind::BreakOutsideLoop, - span, - self.source.clone(), - ) - })?; - for jump in ctx.break_jumps { - self.patch_jump(jump); - } - - Ok(()) - } - - #[allow(clippy::too_many_arguments)] - pub fn compile_typed_for( - &mut self, - iterator: &str, - start: &aelys_sema::TypedExpr, - end: &aelys_sema::TypedExpr, - inclusive: bool, - step: Option<&aelys_sema::TypedExpr>, - body: &aelys_sema::TypedStmt, - span: Span, - ) -> Result<()> { - self.begin_scope(); - - let iter_reg = self.alloc_consecutive_registers_for_call(3, span)?; - let end_reg = iter_reg + 1; - let step_reg = iter_reg + 2; - - self.register_pool[iter_reg as usize] = true; - self.register_pool[end_reg as usize] = true; - self.register_pool[step_reg as usize] = true; - self.next_register = self.next_register.max(step_reg + 1); - - self.compile_typed_expr(start, iter_reg)?; - self.compile_typed_expr(end, end_reg)?; - - if let Some(step_expr) = step { - self.compile_typed_expr(step_expr, step_reg)?; - } else { - self.emit_b(OpCode::LoadI, step_reg, 1, span); - } - - self.add_local( - iterator.to_string(), - false, - iter_reg, - aelys_sema::ResolvedType::I64, - ); - self.loop_variables.push(iterator.to_string()); - - // Subtract step from iter so that the first ForLoopI increment gives the correct start value - // This ensures that for empty ranges (like 0..0), the loop body is never executed - self.emit_a(OpCode::Sub, iter_reg, iter_reg, step_reg, span); - - // Jump to the ForLoopI check before executing the body for the first time - let jump_to_forloop = self.emit_jump(OpCode::Jump, span); - - let loop_start = self.current_offset(); - - self.loop_stack.push(super::super::LoopContext { - start: loop_start, - break_jumps: Vec::new(), - continue_jumps: Vec::new(), - is_for_loop: true, - }); - - let opcode = if inclusive { - OpCode::ForLoopIInc - } else { - OpCode::ForLoopI - }; - - self.compile_typed_stmt(body)?; - - let continue_target = self.current_offset(); - - // Patch the initial jump to point here (to ForLoopI) - self.patch_jump(jump_to_forloop); - - let offset = (self.current_offset() - loop_start + 1) as i16; - self.emit_b(opcode, iter_reg, -offset, span); - - let ctx = self.loop_stack.pop().ok_or_else(|| { - CompileError::new( - CompileErrorKind::ContinueOutsideLoop, - span, - self.source.clone(), - ) - })?; - for jump in ctx.continue_jumps { - let offset_to_target = (continue_target as isize - jump as isize - 1) as i16; - *self.current.bytecode_mut(jump) = - (OpCode::Jump as u32) << 24 | ((offset_to_target as u32) & 0xFFFFFF); - } - - for jump in ctx.break_jumps { - self.patch_jump(jump); - } - - self.loop_variables.pop(); - self.free_register(step_reg); - self.free_register(end_reg); - self.end_scope(); - - Ok(()) - } - - pub fn compile_typed_for_each( - &mut self, - iterator: &str, - iterable: &aelys_sema::TypedExpr, - elem_type: &InferType, - body: &aelys_sema::TypedStmt, - span: Span, - ) -> Result<()> { - match &iterable.ty { - InferType::String => self.compile_string_for_each(iterator, iterable, body, span), - InferType::Vec(inner) => self.compile_collection_for_each( - iterator, - iterable, - inner, - body, - OpCode::VecForLoop, - span, - ), - InferType::Array(inner) => self.compile_collection_for_each( - iterator, - iterable, - inner, - body, - OpCode::ArrayForLoop, - span, - ), - InferType::Dynamic | InferType::Var(_) => { - // dynamic: default to VecForLoop (works for both at runtime via object kind) - self.compile_collection_for_each( - iterator, - iterable, - &InferType::Dynamic, - body, - OpCode::VecForLoop, - span, - ) - } - _ => Err(aelys_common::AelysError::Compile(CompileError::new( - CompileErrorKind::TypeInferenceError(format!( - "for-each over {:?} not yet supported", - elem_type - )), - span, - self.source.clone(), - ))), - } - } - - fn infer_to_resolved(ty: &InferType) -> aelys_sema::ResolvedType { - aelys_sema::ResolvedType::from_infer_type(ty) - } - - fn compile_collection_for_each( - &mut self, - iterator: &str, - iterable: &aelys_sema::TypedExpr, - inner_type: &InferType, - body: &aelys_sema::TypedStmt, - opcode: OpCode, - span: Span, - ) -> Result<()> { - self.begin_scope(); - - // Allocate 3 consecutive registers: [element, index, collection_ptr] - let elem_reg = self.alloc_consecutive_registers_for_call(3, span)?; - let index_reg = elem_reg + 1; - let coll_reg = elem_reg + 2; - - self.register_pool[elem_reg as usize] = true; - self.register_pool[index_reg as usize] = true; - self.register_pool[coll_reg as usize] = true; - self.next_register = self.next_register.max(coll_reg + 1); - - // Compile iterable into collection_ptr register - self.compile_typed_expr(iterable, coll_reg)?; - - // Initialize index to 0 - self.emit_b(OpCode::LoadI, index_reg, 0, span); - - // Jump to ForLoop check before executing body - let jump_to_forloop = self.emit_jump(OpCode::Jump, span); - - let loop_start = self.current_offset(); - - self.loop_stack.push(super::super::LoopContext { - start: loop_start, - break_jumps: Vec::new(), - continue_jumps: Vec::new(), - is_for_loop: true, - }); - - // Register the iterator variable pointing to element register - let resolved = Self::infer_to_resolved(inner_type); - self.add_local(iterator.to_string(), false, elem_reg, resolved); - - // Compile loop body - self.compile_typed_stmt(body)?; - - let continue_target = self.current_offset(); - - // Patch the initial jump to point here (to VecForLoop/ArrayForLoop) - self.patch_jump(jump_to_forloop); - - // Emit VecForLoop/ArrayForLoop: operates on elem_reg (consecutive regs) - let offset = (self.current_offset() - loop_start + 1) as i16; - self.emit_b(opcode, elem_reg, -offset, span); - - let ctx = self.loop_stack.pop().ok_or_else(|| { - CompileError::new( - CompileErrorKind::ContinueOutsideLoop, - span, - self.source.clone(), - ) - })?; - for jump in ctx.continue_jumps { - let offset_to_target = (continue_target as isize - jump as isize - 1) as i16; - *self.current.bytecode_mut(jump) = - (OpCode::Jump as u32) << 24 | ((offset_to_target as u32) & 0xFFFFFF); - } - for jump in ctx.break_jumps { - self.patch_jump(jump); - } - - self.free_register(coll_reg); - self.free_register(index_reg); - self.end_scope(); - - Ok(()) - } - - fn compile_string_for_each( - &mut self, - iterator: &str, - iterable: &aelys_sema::TypedExpr, - body: &aelys_sema::TypedStmt, - span: Span, - ) -> Result<()> { - self.begin_scope(); - - // Allocate 3 consecutive registers: [char_result, byte_offset, string_ptr] - let char_reg = self.alloc_consecutive_registers_for_call(3, span)?; - let offset_reg = char_reg + 1; - let str_reg = char_reg + 2; - - self.register_pool[char_reg as usize] = true; - self.register_pool[offset_reg as usize] = true; - self.register_pool[str_reg as usize] = true; - self.next_register = self.next_register.max(str_reg + 1); - - // Compile iterable into string_ptr register - self.compile_typed_expr(iterable, str_reg)?; - - // Initialize byte_offset to 0 - self.emit_b(OpCode::LoadI, offset_reg, 0, span); - - // Jump to StringForLoop check before executing body - let jump_to_forloop = self.emit_jump(OpCode::Jump, span); - - let loop_start = self.current_offset(); - - self.loop_stack.push(super::super::LoopContext { - start: loop_start, - break_jumps: Vec::new(), - continue_jumps: Vec::new(), - is_for_loop: true, - }); - - // Register the iterator variable pointing to char_result register - self.add_local( - iterator.to_string(), - false, - char_reg, - aelys_sema::ResolvedType::String, - ); - - // Compile loop body - self.compile_typed_stmt(body)?; - - let continue_target = self.current_offset(); - - // Patch the initial jump to point here (to StringForLoop) - self.patch_jump(jump_to_forloop); - - // Emit StringForLoop: operates on char_reg (consecutive regs) - let offset = (self.current_offset() - loop_start + 1) as i16; - self.emit_b(OpCode::StringForLoop, char_reg, -offset, span); - - let ctx = self.loop_stack.pop().ok_or_else(|| { - CompileError::new( - CompileErrorKind::ContinueOutsideLoop, - span, - self.source.clone(), - ) - })?; - for jump in ctx.continue_jumps { - let offset_to_target = (continue_target as isize - jump as isize - 1) as i16; - *self.current.bytecode_mut(jump) = - (OpCode::Jump as u32) << 24 | ((offset_to_target as u32) & 0xFFFFFF); - } - for jump in ctx.break_jumps { - self.patch_jump(jump); - } - - self.free_register(str_reg); - self.free_register(offset_reg); - self.end_scope(); - - Ok(()) - } -} diff --git a/backend/src/compiler/stmt/mod.rs b/backend/src/compiler/stmt/mod.rs deleted file mode 100644 index d165124..0000000 --- a/backend/src/compiler/stmt/mod.rs +++ /dev/null @@ -1,79 +0,0 @@ -mod control_flow; -mod control_if; -mod decl; -mod expression; -mod looping; -mod typed; - -use super::Compiler; -use aelys_common::Result; -use aelys_syntax::Span; -use aelys_syntax::ast::{Stmt, StmtKind}; - -impl Compiler { - pub fn compile_stmt(&mut self, stmt: &Stmt) -> Result<()> { - match &stmt.kind { - StmtKind::Expression(expr) => self.compile_expression_stmt(expr), - StmtKind::Let { - name, - mutable, - type_annotation, - initializer, - is_pub, - } => self.compile_let( - name, - *mutable, - type_annotation.as_ref(), - initializer, - *is_pub, - ), - StmtKind::Block(stmts) => self.compile_block(stmts), - StmtKind::If { - condition, - then_branch, - else_branch, - } => self.compile_if(condition, then_branch, else_branch.as_deref()), - StmtKind::While { condition, body } => self.compile_while(condition, body), - StmtKind::For { - iterator, - start, - end, - inclusive, - step, - body, - } => self.compile_for( - iterator, - start, - end, - *inclusive, - step.as_ref().as_ref(), - body, - stmt.span, - ), - StmtKind::ForEach { .. } => { - // ForEach is handled through typed compilation path only - Ok(()) - } - StmtKind::Break => self.compile_break(stmt.span), - StmtKind::Continue => self.compile_continue(stmt.span), - StmtKind::Return(expr) => self.compile_return(expr.as_ref(), stmt.span), - StmtKind::Function(func) => self.compile_function(func), - StmtKind::Needs(needs) => self.compile_needs(needs, stmt.span), - StmtKind::StructDecl { .. } => Ok(()), - } - } - - pub fn compile_block(&mut self, stmts: &[Stmt]) -> Result<()> { - self.begin_scope(); - for s in stmts { - self.compile_stmt(s)?; - } - self.end_scope(); - Ok(()) - } - - // imports are resolved earlier, nothing to emit here - pub fn compile_needs(&mut self, _: &aelys_syntax::ast::NeedsStmt, _: Span) -> Result<()> { - Ok(()) - } -} diff --git a/backend/src/compiler/stmt/typed.rs b/backend/src/compiler/stmt/typed.rs deleted file mode 100644 index 73e96f4..0000000 --- a/backend/src/compiler/stmt/typed.rs +++ /dev/null @@ -1,83 +0,0 @@ -use super::Compiler; -use aelys_common::Result; - -impl Compiler { - pub fn compile_typed_stmt(&mut self, stmt: &aelys_sema::TypedStmt) -> Result<()> { - use aelys_sema::TypedStmtKind; - - match &stmt.kind { - TypedStmtKind::Expression(expr) => { - let reg = self.alloc_register()?; - self.compile_typed_expr(expr, reg)?; - self.free_register(reg); - Ok(()) - } - TypedStmtKind::Let { - name, - mutable, - initializer, - var_type, - is_pub, - } => { - let resolved_type = aelys_sema::ResolvedType::from_infer_type(var_type); - self.compile_typed_let( - name, - *mutable, - initializer, - &resolved_type, - *is_pub, - stmt.span, - ) - } - TypedStmtKind::Block(stmts) => { - self.begin_scope(); - for s in stmts { - self.compile_typed_stmt(s)?; - } - self.end_scope(); - Ok(()) - } - TypedStmtKind::If { - condition, - then_branch, - else_branch, - } => self.compile_typed_if_stmt( - condition, - then_branch, - else_branch.as_deref(), - stmt.span, - ), - TypedStmtKind::While { condition, body } => { - self.compile_typed_while(condition, body, stmt.span) - } - TypedStmtKind::For { - iterator, - start, - end, - inclusive, - step, - body, - } => self.compile_typed_for( - iterator, - start, - end, - *inclusive, - step.as_ref().as_ref(), - body, - stmt.span, - ), - TypedStmtKind::ForEach { - iterator, - iterable, - elem_type, - body, - } => self.compile_typed_for_each(iterator, iterable, elem_type, body, stmt.span), - TypedStmtKind::Return(expr) => self.compile_typed_return(expr.as_ref(), stmt.span), - TypedStmtKind::Break => self.compile_break(stmt.span), - TypedStmtKind::Continue => self.compile_continue(stmt.span), - TypedStmtKind::Function(func) => self.compile_typed_function(func), - TypedStmtKind::Needs(_needs) => Ok(()), - TypedStmtKind::StructDecl { .. } => Ok(()), - } - } -} diff --git a/backend/src/lib.rs b/backend/src/lib.rs deleted file mode 100644 index 00e2c66..0000000 --- a/backend/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -// typed AST -> bytecode - -pub mod compiler; -pub mod opcode_select; - -pub use compiler::{Compiler, Local, LoopContext, Scope}; diff --git a/backend/src/opcode_select.rs b/backend/src/opcode_select.rs deleted file mode 100644 index 356e607..0000000 --- a/backend/src/opcode_select.rs +++ /dev/null @@ -1,185 +0,0 @@ -// pick typed or generic opcode based on resolved types - -use aelys_bytecode::OpCode; -use aelys_sema::ResolvedType; -use aelys_syntax::ast::BinaryOp; - -pub fn select_opcode(op: BinaryOp, left: &ResolvedType, right: &ResolvedType) -> OpCode { - let needs_guard = left.needs_guard() || right.needs_guard(); - let left_inner = left.unwrap_uncertain(); - let right_inner = right.unwrap_uncertain(); - - match (left_inner, right_inner) { - // sized integer types collapse to int opcodes until LLVM backend - (l, r) if l.is_integer() && r.is_integer() => { - if needs_guard { - select_guarded_int_opcode(op) - } else { - select_typed_int_opcode(op) - } - } - // sized float types collapse to float opcodes until LLVM backend - (l, r) if l.is_float() && r.is_float() => { - if needs_guard { - select_guarded_float_opcode(op) - } else { - select_typed_float_opcode(op) - } - } - // int+float cross-type falls back to guarded float - (l, r) if (l.is_integer() && r.is_float()) || (l.is_float() && r.is_integer()) => { - select_guarded_float_opcode(op) - } - (ResolvedType::Dynamic, _) | (_, ResolvedType::Dynamic) => select_generic_opcode(op), - _ => select_generic_opcode(op), - } -} - -fn select_typed_int_opcode(op: BinaryOp) -> OpCode { - match op { - BinaryOp::Add => OpCode::AddII, - BinaryOp::Sub => OpCode::SubII, - BinaryOp::Mul => OpCode::MulII, - BinaryOp::Div => OpCode::DivII, - BinaryOp::Mod => OpCode::ModII, - BinaryOp::Lt => OpCode::LtII, - BinaryOp::Le => OpCode::LeII, - BinaryOp::Gt => OpCode::GtII, - BinaryOp::Ge => OpCode::GeII, - BinaryOp::Eq => OpCode::EqII, - BinaryOp::Ne => OpCode::NeII, - BinaryOp::Shl => OpCode::ShlII, - BinaryOp::Shr => OpCode::ShrII, - BinaryOp::BitAnd => OpCode::AndII, - BinaryOp::BitOr => OpCode::OrII, - BinaryOp::BitXor => OpCode::XorII, - } -} - -fn select_typed_float_opcode(op: BinaryOp) -> OpCode { - match op { - BinaryOp::Add => OpCode::AddFF, - BinaryOp::Sub => OpCode::SubFF, - BinaryOp::Mul => OpCode::MulFF, - BinaryOp::Div => OpCode::DivFF, - BinaryOp::Mod => OpCode::ModFF, - BinaryOp::Lt => OpCode::LtFF, - BinaryOp::Le => OpCode::LeFF, - BinaryOp::Gt => OpCode::GtFF, - BinaryOp::Ge => OpCode::GeFF, - BinaryOp::Eq => OpCode::EqFF, - BinaryOp::Ne => OpCode::NeFF, - BinaryOp::Shl => OpCode::Shl, - BinaryOp::Shr => OpCode::Shr, - BinaryOp::BitAnd => OpCode::BitAnd, - BinaryOp::BitOr => OpCode::BitOr, - BinaryOp::BitXor => OpCode::BitXor, - } -} - -fn select_guarded_int_opcode(op: BinaryOp) -> OpCode { - match op { - BinaryOp::Add => OpCode::AddIIG, - BinaryOp::Sub => OpCode::SubIIG, - BinaryOp::Mul => OpCode::MulIIG, - BinaryOp::Div => OpCode::DivIIG, - BinaryOp::Mod => OpCode::ModIIG, - BinaryOp::Lt => OpCode::LtIIG, - BinaryOp::Le => OpCode::LeIIG, - BinaryOp::Gt => OpCode::GtIIG, - BinaryOp::Ge => OpCode::GeIIG, - BinaryOp::Eq => OpCode::EqIIG, - BinaryOp::Ne => OpCode::NeIIG, - BinaryOp::Shl => OpCode::ShlII, - BinaryOp::Shr => OpCode::ShrII, - BinaryOp::BitAnd => OpCode::AndII, - BinaryOp::BitOr => OpCode::OrII, - BinaryOp::BitXor => OpCode::XorII, - } -} - -fn select_guarded_float_opcode(op: BinaryOp) -> OpCode { - match op { - BinaryOp::Add => OpCode::AddFFG, - BinaryOp::Sub => OpCode::SubFFG, - BinaryOp::Mul => OpCode::MulFFG, - BinaryOp::Div => OpCode::DivFFG, - BinaryOp::Mod => OpCode::ModFFG, - BinaryOp::Lt => OpCode::LtFFG, - BinaryOp::Le => OpCode::LeFFG, - BinaryOp::Gt => OpCode::GtFFG, - BinaryOp::Ge => OpCode::GeFFG, - BinaryOp::Eq => OpCode::EqFFG, - BinaryOp::Ne => OpCode::NeFFG, - BinaryOp::Shl => OpCode::Shl, - BinaryOp::Shr => OpCode::Shr, - BinaryOp::BitAnd => OpCode::BitAnd, - BinaryOp::BitOr => OpCode::BitOr, - BinaryOp::BitXor => OpCode::BitXor, - } -} - -fn select_generic_opcode(op: BinaryOp) -> OpCode { - match op { - BinaryOp::Add => OpCode::Add, - BinaryOp::Sub => OpCode::Sub, - BinaryOp::Mul => OpCode::Mul, - BinaryOp::Div => OpCode::Div, - BinaryOp::Mod => OpCode::Mod, - BinaryOp::Lt => OpCode::Lt, - BinaryOp::Le => OpCode::Le, - BinaryOp::Gt => OpCode::Gt, - BinaryOp::Ge => OpCode::Ge, - BinaryOp::Eq => OpCode::Eq, - BinaryOp::Ne => OpCode::Ne, - BinaryOp::Shl => OpCode::Shl, - BinaryOp::Shr => OpCode::Shr, - BinaryOp::BitAnd => OpCode::BitAnd, - BinaryOp::BitOr => OpCode::BitOr, - BinaryOp::BitXor => OpCode::BitXor, - } -} - -pub fn compute_result_type( - op: BinaryOp, - left: &ResolvedType, - right: &ResolvedType, -) -> ResolvedType { - let left_inner = left.unwrap_uncertain(); - let right_inner = right.unwrap_uncertain(); - let make_uncertain = left.needs_guard() || right.needs_guard(); - - let result = match op { - BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div | BinaryOp::Mod => { - match (left_inner, right_inner) { - // same sized type from unification - (a, b) if a.is_integer() && b.is_integer() => a.clone(), - (a, b) if a.is_float() && b.is_float() => a.clone(), - // cross-type int+float falls back to float - (_, r) if left_inner.is_integer() && r.is_float() => r.clone(), - (l, _) if l.is_float() && right_inner.is_integer() => l.clone(), - (ResolvedType::String, ResolvedType::String) if matches!(op, BinaryOp::Add) => { - ResolvedType::String - } - _ => ResolvedType::Dynamic, - } - } - BinaryOp::Lt | BinaryOp::Le | BinaryOp::Gt | BinaryOp::Ge | BinaryOp::Eq | BinaryOp::Ne => { - ResolvedType::Bool - } - BinaryOp::Shl | BinaryOp::Shr | BinaryOp::BitAnd | BinaryOp::BitOr | BinaryOp::BitXor => { - // sized integer types collapse to the left operand's type - if left_inner.is_integer() { - left_inner.clone() - } else { - ResolvedType::I64 - } - } - }; - - if make_uncertain && !matches!(result, ResolvedType::Bool | ResolvedType::Dynamic) { - ResolvedType::Uncertain(Box::new(result)) - } else { - result - } -} diff --git a/bytecode/Cargo.toml b/bytecode/Cargo.toml deleted file mode 100644 index 949c3fc..0000000 --- a/bytecode/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "aelys-bytecode" -version.workspace = true -edition = "2024" - -[dependencies] -thiserror = "2.0.17" - -[lib] -doctest = false \ No newline at end of file diff --git a/bytecode/README.md b/bytecode/README.md deleted file mode 100644 index f012016..0000000 --- a/bytecode/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# aelys-bytecode - -Bytecode instruction set and chunk representation. diff --git a/bytecode/src/asm/assembler.rs b/bytecode/src/asm/assembler.rs deleted file mode 100644 index 477aef8..0000000 --- a/bytecode/src/asm/assembler.rs +++ /dev/null @@ -1,574 +0,0 @@ -//! Assembler: Parses .aasm text and produces bytecode - -use super::lexer::{Lexer, Token}; -use crate::bytecode::{Function, GlobalLayout, UpvalueDescriptor}; -use crate::heap::Heap; -use crate::value::Value; -use std::collections::HashMap; -use thiserror::Error; - -/// Assembler error types -#[derive(Debug, Error)] -pub enum AssemblerError { - #[error("Parse error at line {line}: {message}")] - ParseError { line: usize, message: String }, - - #[error("Unknown opcode: {0}")] - UnknownOpcode(String), - - #[error("Undefined label: {0}")] - UndefinedLabel(String), - - #[error("Duplicate label: {0}")] - DuplicateLabel(String), - - #[error("Invalid register: {0}")] - InvalidRegister(String), - - #[error("Invalid number: {0}")] - InvalidNumber(String), - - #[error("Invalid string literal: {0}")] - InvalidString(String), - - #[error("Expected {expected}, got {got}")] - Expected { expected: String, got: String }, - - #[error("Unexpected end of input")] - UnexpectedEof, -} - -/// Result type for assembler operations -pub type Result = std::result::Result; - -/// Assemble .aasm source into bytecode functions -pub fn assemble(source: &str) -> Result<(Vec, Heap)> { - let mut parser = AasmParser::new(source); - parser.parse() -} - -/// Convenience function that takes a string -pub fn assemble_from_string(source: &str) -> Result<(Vec, Heap)> { - assemble(source) -} - -/// Parser for .aasm files -pub(super) struct AasmParser<'a> { - pub(super) lexer: Lexer<'a>, - pub(super) current: Token, - heap: Heap, -} - -impl<'a> AasmParser<'a> { - fn new(source: &'a str) -> Self { - let mut lexer = Lexer::new(source); - let current = lexer.next_token().unwrap_or(Token::Eof); - Self { - lexer, - current, - heap: Heap::new(), - } - } - - pub(super) fn advance(&mut self) -> Result { - let prev = std::mem::replace(&mut self.current, self.lexer.next_token()?); - Ok(prev) - } - - fn skip_newlines(&mut self) -> Result<()> { - while self.current == Token::Newline { - self.advance()?; - } - Ok(()) - } - - fn expect(&mut self, expected: Token) -> Result<()> { - if std::mem::discriminant(&self.current) == std::mem::discriminant(&expected) { - self.advance()?; - Ok(()) - } else { - Err(AssemblerError::Expected { - expected: format!("{:?}", expected), - got: format!("{:?}", self.current), - }) - } - } - - fn parse(&mut self) -> Result<(Vec, Heap)> { - let mut functions = Vec::new(); - - self.skip_newlines()?; - - if let Token::Directive(ref d) = self.current - && d == "version" - { - self.advance()?; - if let Token::Int(_) = self.current { - self.advance()?; - } - self.skip_newlines()?; - } - - while self.current != Token::Eof { - self.skip_newlines()?; - if self.current == Token::Eof { - break; - } - - if let Token::Directive(ref d) = self.current.clone() { - if d == "function" { - let func = self.parse_function()?; - functions.push(func); - } else { - return Err(AssemblerError::ParseError { - line: self.lexer.current_line(), - message: format!("Expected .function, got .{}", d), - }); - } - } else { - self.skip_newlines()?; - if self.current == Token::Eof { - break; - } - if let Token::Directive(_) = self.current { - continue; - } - return Err(AssemblerError::ParseError { - line: self.lexer.current_line(), - message: format!("Expected directive, got {:?}", self.current), - }); - } - } - - let heap = std::mem::take(&mut self.heap); - Ok((functions, heap)) - } - - fn parse_function(&mut self) -> Result { - // .function N - self.expect(Token::Directive("function".to_string()))?; - let _func_idx = match self.advance()? { - Token::Int(n) => n as usize, - t => { - return Err(AssemblerError::Expected { - expected: "function index".to_string(), - got: format!("{:?}", t), - }); - } - }; - self.skip_newlines()?; - - let mut name = None; - let mut arity = 0u8; - let mut num_registers = 0u8; - let mut constants = Vec::new(); - let mut bytecode = Vec::new(); - let mut global_names = Vec::new(); - let mut upvalue_descriptors = Vec::new(); - let mut labels: HashMap = HashMap::new(); - let mut label_refs: Vec<(usize, String, bool)> = Vec::new(); // (offset, label, is_conditional) - - loop { - match &self.current { - Token::Directive(d) => match d.as_str() { - "name" => { - self.advance()?; - if let Token::String(s) = self.advance()? { - name = Some(s); - } - } - "arity" => { - self.advance()?; - if let Token::Int(n) = self.advance()? { - arity = n as u8; - } - } - "registers" => { - self.advance()?; - if let Token::Int(n) = self.advance()? { - num_registers = n as u8; - } - } - "globals" => { - self.advance()?; - self.skip_newlines()?; - global_names = self.parse_globals()?; - } - "constants" => { - self.advance()?; - self.skip_newlines()?; - constants = self.parse_constants()?; - } - "upvalues" => { - self.advance()?; - self.skip_newlines()?; - upvalue_descriptors = self.parse_upvalues()?; - } - "code" => { - self.advance()?; - self.skip_newlines()?; - self.parse_code(&mut bytecode, &mut labels, &mut label_refs)?; - break; - } - "function" => break, - _ => { - self.advance()?; - } - }, - Token::Newline => { - self.advance()?; - } - Token::Eof => break, - _ => break, - } - } - - /* - * This is the label patching phase. - * For each label reference, we look up the target label's offset, - * calculate the relative jump distance, and patch the instruction. - */ - for (offset, label, is_conditional) in label_refs { - let target = labels - .get(&label) - .ok_or_else(|| AssemblerError::UndefinedLabel(label.clone()))?; - - // Calculate relative offset: target - (offset + 1) - let relative = (*target as i32) - (offset as i32) - 1; - let relative = relative as i16; - - // Patch the instruction - let instr = bytecode[offset]; - let patched = if is_conditional { - // Keep the register in A field - let a = (instr >> 16) & 0xFF; - let op = instr >> 24; - (op << 24) | (a << 16) | ((relative as u16) as u32) - } else { - // Jump has no register - let op = instr >> 24; - (op << 24) | ((relative as u16) as u32) - }; - bytecode[offset] = patched; - } - - let mut func = Function::new(name, arity); - func.num_registers = num_registers; - func.set_bytecode(bytecode); - func.constants = constants; - func.global_layout = GlobalLayout::new(global_names); - func.upvalue_descriptors = upvalue_descriptors; - func.compute_global_layout_hash(); - - Ok(func) - } - - fn parse_constants(&mut self) -> Result> { - let mut constants = Vec::new(); - - loop { - match &self.current { - Token::Directive(_) => break, - Token::Eof => break, - Token::Newline => { - self.advance()?; - continue; - } - Token::Int(idx) => { - // Parse constant: INDEX: TYPE VALUE - let _idx = *idx; - self.advance()?; - self.expect(Token::Colon)?; - - let value = self.parse_constant_value()?; - constants.push(value); - self.skip_newlines()?; - } - _ => break, - } - } - - Ok(constants) - } - - /// Parse global names section: INDEX: "name" - fn parse_globals(&mut self) -> Result> { - let mut globals = Vec::new(); - - loop { - match &self.current { - Token::Directive(_) => break, - Token::Eof => break, - Token::Newline => { - self.advance()?; - continue; - } - Token::Int(idx) => { - // Parse global: INDEX: "name" - let idx = *idx as usize; - self.advance()?; - self.expect(Token::Colon)?; - - if let Token::String(name) = self.advance()? { - // Ensure the vec is large enough - if globals.len() <= idx { - globals.resize(idx + 1, String::new()); - } - globals[idx] = name; - } - self.skip_newlines()?; - } - _ => break, - } - } - - Ok(globals) - } - - /// Parse upvalue descriptors section: INDEX: (local|upvalue) INDEX - fn parse_upvalues(&mut self) -> Result> { - let mut upvalues = Vec::new(); - - loop { - match &self.current { - Token::Directive(_) => break, - Token::Eof => break, - Token::Newline => { - self.advance()?; - continue; - } - Token::Int(idx) => { - // Parse upvalue: INDEX: (local|upvalue) INDEX - let idx = *idx as usize; - self.advance()?; - self.expect(Token::Colon)?; - - // Parse kind: "local" or "upvalue" - let is_local = match &self.current { - Token::Ident(s) if s == "local" => { - self.advance()?; - true - } - Token::Ident(s) if s == "upvalue" => { - self.advance()?; - false - } - _ => { - return Err(AssemblerError::Expected { - expected: "local or upvalue".to_string(), - got: format!("{:?}", self.current), - }); - } - }; - - // Parse the index - let index = self.parse_u8()?; - - // Ensure the vec is large enough - if upvalues.len() <= idx { - upvalues.resize( - idx + 1, - UpvalueDescriptor { - is_local: true, - index: 0, - }, - ); - } - upvalues[idx] = UpvalueDescriptor { is_local, index }; - self.skip_newlines()?; - } - _ => break, - } - } - - Ok(upvalues) - } - - /// Parse a constant value of the form TYPE VALUE - fn parse_constant_value(&mut self) -> Result { - match self.advance()? { - Token::Ident(type_name) => { - match type_name.as_str() { - "int" => { - if let Token::Int(n) = self.advance()? { - Ok(Value::int(n)) - } else { - Err(AssemblerError::Expected { - expected: "integer".to_string(), - got: format!("{:?}", self.current), - }) - } - } - "float" => match self.advance()? { - Token::Float(f) => Ok(Value::float(f)), - Token::Int(n) => Ok(Value::float(n as f64)), - Token::Ident(s) if s == "nan" => Ok(Value::float(f64::NAN)), - Token::Ident(s) if s == "inf" => Ok(Value::float(f64::INFINITY)), - t => Err(AssemblerError::Expected { - expected: "float".to_string(), - got: format!("{:?}", t), - }), - }, - "bool" => match self.advance()? { - Token::Bool(b) => Ok(Value::bool(b)), - Token::Ident(s) if s == "true" => Ok(Value::bool(true)), - Token::Ident(s) if s == "false" => Ok(Value::bool(false)), - t => Err(AssemblerError::Expected { - expected: "bool".to_string(), - got: format!("{:?}", t), - }), - }, - "string" => { - if let Token::String(s) = self.advance()? { - let str_ref = self.heap.intern_string(&s); - Ok(Value::ptr(str_ref.index())) - } else { - Err(AssemblerError::Expected { - expected: "string".to_string(), - got: format!("{:?}", self.current), - }) - } - } - "ptr" => { - if let Token::Int(n) = self.advance()? { - Ok(Value::ptr(n as usize)) - } else { - Err(AssemblerError::Expected { - expected: "pointer value".to_string(), - got: format!("{:?}", self.current), - }) - } - } - "func" => { - // func @N - self.expect(Token::At)?; - if let Token::Int(n) = self.advance()? { - // Encode as nested function marker (uses dedicated tag) - Ok(Value::nested_fn_marker((n - 1) as usize)) // -1 because main is @0 - } else { - Err(AssemblerError::Expected { - expected: "function index".to_string(), - got: format!("{:?}", self.current), - }) - } - } - "null" => Ok(Value::null()), - "native" => { - if let Token::String(_) = self.advance()? { - // We can't recreate native functions, return null - Ok(Value::null()) - } else { - Ok(Value::null()) - } - } - _ => Err(AssemblerError::Expected { - expected: "constant type".to_string(), - got: type_name, - }), - } - } - Token::Null => Ok(Value::null()), - t => Err(AssemblerError::Expected { - expected: "constant type".to_string(), - got: format!("{:?}", t), - }), - } - } - - fn parse_code( - &mut self, - bytecode: &mut Vec, - labels: &mut HashMap, - label_refs: &mut Vec<(usize, String, bool)>, - ) -> Result<()> { - loop { - match &self.current { - Token::Directive(_) => break, - Token::Eof => break, - Token::Newline => { - self.advance()?; - continue; - } - Token::LabelRef(name) => { - // This is a label definition (L0:) - let label_name = name.clone(); - self.advance()?; - if self.current == Token::Colon { - self.advance()?; - if labels.contains_key(&label_name) { - return Err(AssemblerError::DuplicateLabel(label_name)); - } - labels.insert(label_name, bytecode.len()); - } else { - return Err(AssemblerError::Expected { - expected: "colon after label".to_string(), - got: format!("{:?}", self.current), - }); - } - } - Token::Int(_) => { - // Skip instruction offset - self.advance()?; - self.expect(Token::Colon)?; - self.parse_instruction(bytecode, label_refs)?; - } - Token::Ident(_) => { - // Instruction without offset - self.parse_instruction(bytecode, label_refs)?; - } - _ => { - return Err(AssemblerError::ParseError { - line: self.lexer.current_line(), - message: format!("Unexpected token in code: {:?}", self.current), - }); - } - } - } - Ok(()) - } - - pub(super) fn parse_register(&mut self) -> Result { - match self.advance()? { - Token::Register(r) => Ok(r), - t => Err(AssemblerError::Expected { - expected: "register".to_string(), - got: format!("{:?}", t), - }), - } - } - - pub(super) fn parse_u8(&mut self) -> Result { - match self.advance()? { - Token::Int(n) if (0..=255).contains(&n) => Ok(n as u8), - Token::Int(n) => Err(AssemblerError::InvalidNumber(format!( - "{} (must be 0-255)", - n - ))), - t => Err(AssemblerError::Expected { - expected: "u8".to_string(), - got: format!("{:?}", t), - }), - } - } - - pub(super) fn parse_i16(&mut self) -> Result { - match self.advance()? { - Token::Int(n) if n >= i16::MIN as i64 && n <= i16::MAX as i64 => Ok(n as i16), - Token::Int(n) => Err(AssemblerError::InvalidNumber(format!( - "{} (must fit i16)", - n - ))), - t => Err(AssemblerError::Expected { - expected: "i16".to_string(), - got: format!("{:?}", t), - }), - } - } - - pub(super) fn skip_comma(&mut self) -> Result<()> { - if self.current == Token::Comma { - self.advance()?; - } - Ok(()) - } -} diff --git a/bytecode/src/asm/binary.rs b/bytecode/src/asm/binary.rs deleted file mode 100644 index d4f46ee..0000000 --- a/bytecode/src/asm/binary.rs +++ /dev/null @@ -1,725 +0,0 @@ -//! Binary serialization for .avbc format - -use crate::bytecode::{Function, GlobalLayout, UpvalueDescriptor}; -use crate::heap::Heap; -use crate::object::{GcRef, ObjectKind}; -use crate::value::Value; -use std::io::{self, Cursor, Read}; -use thiserror::Error; - -/// Magic bytes for .avbc files -pub const MAGIC: &[u8; 4] = b"VBXQ"; - -/// Current format version -pub const VERSION: u16 = 1; - -const MAX_BYTECODE_LEN: usize = 1_000_000; -const MAX_CONSTANTS: usize = 65_535; -const MAX_NESTED_FUNCTIONS: usize = 4_096; -const MAX_UPVALUE_DESCRIPTORS: usize = 256; -const MAX_LINES: usize = 1_000_000; -const MAX_GLOBAL_NAMES: usize = 65_535; -const MAX_STRING_LEN: usize = 1_000_000; -const MAX_NESTING_DEPTH: usize = 64; -const MAX_SECTION_LEN: usize = 256 * 1024 * 1024; - -const SECTION_MANIFEST: u32 = u32::from_le_bytes(*b"MANF"); -const SECTION_BUNDLES: u32 = u32::from_le_bytes(*b"NBND"); - -/// Result type for deserialization with manifest and bundles -pub type DeserializeResult = Result<(Function, Heap, Option>, Vec)>; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct NativeBundle { - pub name: String, - pub target: String, - pub checksum: String, - pub bytes: Vec, -} - -/// Binary format errors -#[derive(Debug, Error)] -pub enum BinaryError { - #[error("IO error: {0}")] - Io(#[from] io::Error), - - #[error("Invalid magic number")] - InvalidMagic, - - #[error("Unsupported version: {0}")] - UnsupportedVersion(u16), - - #[error("Invalid constant type: {0}")] - InvalidConstantType(u8), - - #[error("Invalid nested function index: {index} (max: {max})")] - InvalidNestedFunctionIndex { index: usize, max: usize }, - - #[error("Invalid UTF-8 in string")] - InvalidUtf8, - - #[error("Unexpected end of file")] - UnexpectedEof, - - #[error("Limit exceeded: {what} (max {limit})")] - LimitExceeded { what: &'static str, limit: usize }, -} - -pub type Result = std::result::Result; - -/// Serialize a function and its heap to .avbc binary format -pub fn serialize(func: &Function, heap: &Heap) -> Vec { - let mut writer = BinaryWriter::new(); - writer.write_program(func, heap); - writer.into_bytes() -} - -/// Serialize a function and heap to .avbc with optional manifest and bundles. -pub fn serialize_with_manifest( - func: &Function, - heap: &Heap, - manifest: Option<&[u8]>, - bundles: Option<&[NativeBundle]>, -) -> Vec { - let mut writer = BinaryWriter::new(); - writer.write_program(func, heap); - if let Some(manifest_bytes) = manifest { - writer.write_section(SECTION_MANIFEST, manifest_bytes); - } - if let Some(bundles) = bundles { - let data = build_bundles_section(bundles); - writer.write_section(SECTION_BUNDLES, &data); - } - writer.into_bytes() -} - -/// Deserialize .avbc binary format to a function and heap -pub fn deserialize(data: &[u8]) -> Result<(Function, Heap)> { - let reader = BinaryReader::new(data); - reader.read_program() -} - -/// Deserialize .avbc binary format to a function, heap, optional manifest, and bundles. -pub fn deserialize_with_manifest(data: &[u8]) -> DeserializeResult { - let reader = BinaryReader::new(data); - reader.read_program_with_sections() -} - -/// Binary writer for .avbc format -struct BinaryWriter { - buffer: Vec, -} - -impl BinaryWriter { - fn new() -> Self { - Self { buffer: Vec::new() } - } - - fn into_bytes(self) -> Vec { - self.buffer - } - - fn write_program(&mut self, func: &Function, heap: &Heap) { - self.write_bytes(MAGIC); - self.write_u16(VERSION); - self.write_u16(0); // Flags (reserved) - - let func_count = count_functions(func); - self.write_u32(func_count as u32); - self.write_u32(0); // Reserved - - self.write_function(func, heap); - } - - fn write_section(&mut self, tag: u32, data: &[u8]) { - assert!(data.len() <= u32::MAX as usize, "section data too large"); - self.write_u32(tag); - self.write_u32(data.len() as u32); - self.write_bytes(data); - } - - fn write_function(&mut self, func: &Function, heap: &Heap) { - // Name - if let Some(name) = &func.name { - self.write_u16(name.len() as u16); - self.write_bytes(name.as_bytes()); - } else { - self.write_u16(0); - } - - // Metadata - self.write_u8(func.arity); - self.write_u8(func.num_registers); - - // Constants - self.write_u16(func.constants.len() as u16); - for constant in &func.constants { - self.write_constant(constant, heap); - } - - // Bytecode - strip cache state during serialization - self.write_u32(func.bytecode.len() as u32); - let mut skip_cache_words = 0u32; - for &instr in func.bytecode.iter() { - if skip_cache_words > 0 { - // Reset cache words to 0 - self.write_u32(0); - skip_cache_words -= 1; - } else { - let opcode = (instr >> 24) as u8; - // CallGlobalMono (78) -> CallGlobal (77) and skip next 2 words - // CallGlobal (77) -> keep as-is but still reset cache words - if opcode == 78 { - // Rewrite opcode from 78 to 77 - let new_instr = (instr & 0x00FFFFFF) | (77 << 24); - self.write_u32(new_instr); - skip_cache_words = 2; - } else if opcode == 77 { - // CallGlobal - write as-is but reset cache words - self.write_u32(instr); - skip_cache_words = 2; - } else { - self.write_u32(instr); - } - } - } - - // Nested functions - self.write_u16(func.nested_functions.len() as u16); - for nested in &func.nested_functions { - self.write_function(nested, heap); - } - - // Upvalue descriptors (needed for closures) - self.write_u16(func.upvalue_descriptors.len() as u16); - for desc in &func.upvalue_descriptors { - self.write_u8(if desc.is_local { 1 } else { 0 }); - self.write_u8(desc.index); - } - - // Line info (RLE) - self.write_u16(func.lines.len() as u16); - for &(count, line) in &func.lines { - self.write_u16(count); - self.write_u32(line); - } - - // Global names (for indexed global access) - self.write_u16(func.global_layout.names().len() as u16); - for name in func.global_layout.names() { - self.write_u16(name.len() as u16); - self.write_bytes(name.as_bytes()); - } - } - - fn write_constant(&mut self, value: &Value, heap: &Heap) { - if value.is_null() { - self.write_u8(0); // TAG_NULL - } else if let Some(b) = value.as_bool() { - self.write_u8(1); // TAG_BOOL - self.write_u8(if b { 1 } else { 0 }); - } else if let Some(n) = value.as_int() { - self.write_u8(2); // TAG_INT - self.write_i64(n); - } else if value.is_float() { - if let Some(f) = value.as_float() { - self.write_u8(3); // TAG_FLOAT - self.write_f64(f); - } - } else if let Some(func_idx) = value.as_nested_fn_marker() { - // Nested function marker (uses dedicated tag) - self.write_u8(5); // TAG_FUNC - self.write_u32(func_idx as u32); - } else if let Some(ptr) = value.as_ptr() { - // Try to resolve from heap - if let Some(obj) = heap.get(GcRef::new(ptr)) { - match &obj.kind { - ObjectKind::String(s) => { - self.write_u8(4); // TAG_STRING - let bytes = s.as_bytes(); - self.write_u32(bytes.len() as u32); - self.write_bytes(bytes); - } - _ => { - // Other object types: store as ptr - self.write_u8(6); // TAG_PTR - self.write_u64(ptr as u64); - } - } - } else { - self.write_u8(6); // TAG_PTR - self.write_u64(ptr as u64); - } - } else { - // Unknown: store as null - self.write_u8(0); - } - } - - fn write_bytes(&mut self, bytes: &[u8]) { - self.buffer.extend_from_slice(bytes); - } - - fn write_u8(&mut self, v: u8) { - self.buffer.push(v); - } - - fn write_u16(&mut self, v: u16) { - self.buffer.extend_from_slice(&v.to_le_bytes()); - } - - fn write_u32(&mut self, v: u32) { - self.buffer.extend_from_slice(&v.to_le_bytes()); - } - - fn write_u64(&mut self, v: u64) { - self.buffer.extend_from_slice(&v.to_le_bytes()); - } - - fn write_i64(&mut self, v: i64) { - self.buffer.extend_from_slice(&v.to_le_bytes()); - } - - fn write_f64(&mut self, v: f64) { - self.buffer.extend_from_slice(&v.to_le_bytes()); - } -} - -fn build_bundles_section(bundles: &[NativeBundle]) -> Vec { - let mut buf = Vec::new(); - write_u32_to(&mut buf, bundles.len() as u32); - for bundle in bundles { - write_string_to(&mut buf, &bundle.name); - write_string_to(&mut buf, &bundle.target); - write_string_to(&mut buf, &bundle.checksum); - write_bytes_to(&mut buf, &bundle.bytes); - } - assert!( - buf.len() <= MAX_SECTION_LEN, - "native bundle section too large" - ); - buf -} - -fn write_u32_to(buf: &mut Vec, v: u32) { - buf.extend_from_slice(&v.to_le_bytes()); -} - -fn write_string_to(buf: &mut Vec, s: &str) { - assert!(s.len() <= u32::MAX as usize, "string too large for section"); - write_u32_to(buf, s.len() as u32); - buf.extend_from_slice(s.as_bytes()); -} - -fn write_bytes_to(buf: &mut Vec, bytes: &[u8]) { - assert!( - bytes.len() <= u32::MAX as usize, - "bytes too large for section" - ); - write_u32_to(buf, bytes.len() as u32); - buf.extend_from_slice(bytes); -} - -/// Binary reader for .avbc format -struct BinaryReader<'a> { - cursor: Cursor<&'a [u8]>, - heap: Heap, -} - -impl<'a> BinaryReader<'a> { - fn new(data: &'a [u8]) -> Self { - Self { - cursor: Cursor::new(data), - heap: Heap::new(), - } - } - - fn read_program(mut self) -> Result<(Function, Heap)> { - // Header - let mut magic = [0u8; 4]; - self.cursor.read_exact(&mut magic)?; - if &magic != MAGIC { - return Err(BinaryError::InvalidMagic); - } - - let version = self.read_u16()?; - if version != VERSION { - return Err(BinaryError::UnsupportedVersion(version)); - } - - let _flags = self.read_u16()?; - let _func_count = self.read_u32()?; - let _reserved = self.read_u32()?; - - // Read main function (which includes nested functions) - let func = self.read_function(0)?; - - Ok((func, self.heap)) - } - - fn read_program_with_sections(mut self) -> DeserializeResult { - // Header - let mut magic = [0u8; 4]; - self.cursor.read_exact(&mut magic)?; - if &magic != MAGIC { - return Err(BinaryError::InvalidMagic); - } - - let version = self.read_u16()?; - if version != VERSION { - return Err(BinaryError::UnsupportedVersion(version)); - } - - let _flags = self.read_u16()?; - let _func_count = self.read_u32()?; - let _reserved = self.read_u32()?; - - let func = self.read_function(0)?; - let (manifest, bundles) = self.read_sections()?; - - Ok((func, self.heap, manifest, bundles)) - } - - fn read_function(&mut self, depth: usize) -> Result { - if depth > MAX_NESTING_DEPTH { - return Err(BinaryError::LimitExceeded { - what: "function nesting depth", - limit: MAX_NESTING_DEPTH, - }); - } - // Name - let name_len = self.read_u16()? as usize; - if name_len > MAX_STRING_LEN { - return Err(BinaryError::LimitExceeded { - what: "function name length", - limit: MAX_STRING_LEN, - }); - } - let name = if name_len > 0 { - let mut bytes = vec![0u8; name_len]; - self.cursor.read_exact(&mut bytes)?; - Some(String::from_utf8(bytes).map_err(|_| BinaryError::InvalidUtf8)?) - } else { - None - }; - - // Metadata - let arity = self.read_u8()?; - let num_registers = self.read_u8()?; - - // Constants - let const_count = self.read_u16()? as usize; - if const_count > MAX_CONSTANTS { - return Err(BinaryError::LimitExceeded { - what: "constants", - limit: MAX_CONSTANTS, - }); - } - let mut constants = Vec::with_capacity(const_count); - for _ in 0..const_count { - let value = self.read_constant()?; - constants.push(value); - } - - // Bytecode - let bc_len = self.read_u32()? as usize; - if bc_len > MAX_BYTECODE_LEN { - return Err(BinaryError::LimitExceeded { - what: "bytecode length", - limit: MAX_BYTECODE_LEN, - }); - } - let mut bytecode = Vec::with_capacity(bc_len); - for _ in 0..bc_len { - bytecode.push(self.read_u32()?); - } - - // Nested functions - let nested_count = self.read_u16()? as usize; - if nested_count > MAX_NESTED_FUNCTIONS { - return Err(BinaryError::LimitExceeded { - what: "nested functions", - limit: MAX_NESTED_FUNCTIONS, - }); - } - - Self::validate_func_markers(&constants, nested_count)?; - - let mut nested_functions = Vec::with_capacity(nested_count); - for _ in 0..nested_count { - nested_functions.push(self.read_function(depth + 1)?); - } - - // Upvalue descriptors (needed for closures) - let upvalue_count = self.read_u16()? as usize; - if upvalue_count > MAX_UPVALUE_DESCRIPTORS { - return Err(BinaryError::LimitExceeded { - what: "upvalue descriptors", - limit: MAX_UPVALUE_DESCRIPTORS, - }); - } - let mut upvalue_descriptors = Vec::with_capacity(upvalue_count); - for _ in 0..upvalue_count { - let is_local = self.read_u8()? != 0; - let index = self.read_u8()?; - upvalue_descriptors.push(UpvalueDescriptor { is_local, index }); - } - - // Line info - let lines_count = self.read_u16()? as usize; - if lines_count > MAX_LINES { - return Err(BinaryError::LimitExceeded { - what: "line info entries", - limit: MAX_LINES, - }); - } - let mut lines = Vec::with_capacity(lines_count); - for _ in 0..lines_count { - let count = self.read_u16()?; - let line = self.read_u32()?; - lines.push((count, line)); - } - - // Global names (for indexed global access) - let global_names_count = self.read_u16()? as usize; - if global_names_count > MAX_GLOBAL_NAMES { - return Err(BinaryError::LimitExceeded { - what: "global names", - limit: MAX_GLOBAL_NAMES, - }); - } - let mut global_names = Vec::with_capacity(global_names_count); - for _ in 0..global_names_count { - let name_len = self.read_u16()? as usize; - if name_len > MAX_STRING_LEN { - return Err(BinaryError::LimitExceeded { - what: "global name length", - limit: MAX_STRING_LEN, - }); - } - let name = if name_len > 0 { - let mut bytes = vec![0u8; name_len]; - self.cursor.read_exact(&mut bytes)?; - String::from_utf8(bytes).map_err(|_| { - BinaryError::Io(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Invalid UTF-8 in global name", - )) - })? - } else { - String::new() - }; - global_names.push(name); - } - - // Compute global_layout_hash from global layout names - let mut func = Function::new(name, arity); - func.num_registers = num_registers; - func.set_bytecode(bytecode); - func.constants = constants; - func.nested_functions = nested_functions; - func.upvalue_descriptors = upvalue_descriptors; - func.lines = lines; - func.global_layout = GlobalLayout::new(global_names); - func.compute_global_layout_hash(); - - Ok(func) - } - - fn validate_func_markers(constants: &[Value], nested_count: usize) -> Result<()> { - for constant in constants { - if let Some(func_idx) = constant.as_nested_fn_marker() - && func_idx >= nested_count - { - return Err(BinaryError::InvalidNestedFunctionIndex { - index: func_idx, - max: nested_count.saturating_sub(1), - }); - } - } - Ok(()) - } - - fn read_constant(&mut self) -> Result { - let tag = self.read_u8()?; - match tag { - 0 => Ok(Value::null()), // TAG_NULL - 1 => { - // TAG_BOOL - let b = self.read_u8()? != 0; - Ok(Value::bool(b)) - } - 2 => { - // TAG_INT - let n = self.read_i64()?; - Ok(Value::int(n)) - } - 3 => { - // TAG_FLOAT - let f = self.read_f64()?; - Ok(Value::float(f)) - } - 4 => { - // TAG_STRING - let len = self.read_u32()? as usize; - if len > MAX_STRING_LEN { - return Err(BinaryError::LimitExceeded { - what: "string length", - limit: MAX_STRING_LEN, - }); - } - let mut bytes = vec![0u8; len]; - self.cursor.read_exact(&mut bytes)?; - let s = String::from_utf8(bytes).map_err(|_| BinaryError::InvalidUtf8)?; - let str_ref = self.heap.intern_string(&s); - Ok(Value::ptr(str_ref.index())) - } - 5 => { - // TAG_FUNC (nested function marker with dedicated tag) - let func_idx = self.read_u32()? as usize; - Ok(Value::nested_fn_marker(func_idx)) - } - 6 => { - // TAG_PTR - let ptr = self.read_u64()? as usize; - Ok(Value::ptr(ptr)) - } - _ => Err(BinaryError::InvalidConstantType(tag)), - } - } - - fn read_sections(&mut self) -> Result<(Option>, Vec)> { - let mut manifest = None; - let mut bundles = Vec::new(); - while self.remaining() > 0 { - let tag = self.read_u32()?; - let len = self.read_u32()? as usize; - if len > MAX_SECTION_LEN { - return Err(BinaryError::LimitExceeded { - what: "section length", - limit: MAX_SECTION_LEN, - }); - } - let data = self.read_bytes(len)?; - match tag { - SECTION_MANIFEST => { - manifest = Some(data); - } - SECTION_BUNDLES => { - let mut parsed = parse_bundles_section(&data)?; - bundles.append(&mut parsed); - } - _ => {} - } - } - Ok((manifest, bundles)) - } - - fn remaining(&self) -> usize { - let pos = self.cursor.position() as usize; - let len = self.cursor.get_ref().len(); - len.saturating_sub(pos) - } - - fn read_bytes(&mut self, len: usize) -> Result> { - let mut buf = vec![0u8; len]; - self.cursor.read_exact(&mut buf)?; - Ok(buf) - } - - fn read_u8(&mut self) -> Result { - let mut buf = [0u8; 1]; - self.cursor.read_exact(&mut buf)?; - Ok(buf[0]) - } - - fn read_u16(&mut self) -> Result { - let mut buf = [0u8; 2]; - self.cursor.read_exact(&mut buf)?; - Ok(u16::from_le_bytes(buf)) - } - - fn read_u32(&mut self) -> Result { - let mut buf = [0u8; 4]; - self.cursor.read_exact(&mut buf)?; - Ok(u32::from_le_bytes(buf)) - } - - fn read_u64(&mut self) -> Result { - let mut buf = [0u8; 8]; - self.cursor.read_exact(&mut buf)?; - Ok(u64::from_le_bytes(buf)) - } - - fn read_i64(&mut self) -> Result { - let mut buf = [0u8; 8]; - self.cursor.read_exact(&mut buf)?; - Ok(i64::from_le_bytes(buf)) - } - - fn read_f64(&mut self) -> Result { - let mut buf = [0u8; 8]; - self.cursor.read_exact(&mut buf)?; - Ok(f64::from_le_bytes(buf)) - } -} - -fn parse_bundles_section(data: &[u8]) -> Result> { - let mut cursor = Cursor::new(data); - let count = read_u32_from(&mut cursor)? as usize; - let mut bundles = Vec::with_capacity(count); - for _ in 0..count { - let name = read_string_from(&mut cursor, "bundle name")?; - let target = read_string_from(&mut cursor, "bundle target")?; - let checksum = read_string_from(&mut cursor, "bundle checksum")?; - let bytes = read_bytes_from(&mut cursor, "bundle bytes")?; - bundles.push(NativeBundle { - name, - target, - checksum, - bytes, - }); - } - Ok(bundles) -} - -fn read_u32_from(cursor: &mut Cursor<&[u8]>) -> Result { - let mut buf = [0u8; 4]; - cursor.read_exact(&mut buf)?; - Ok(u32::from_le_bytes(buf)) -} - -fn read_string_from(cursor: &mut Cursor<&[u8]>, what: &'static str) -> Result { - let len = read_u32_from(cursor)? as usize; - if len > MAX_STRING_LEN { - return Err(BinaryError::LimitExceeded { - what, - limit: MAX_STRING_LEN, - }); - } - let mut buf = vec![0u8; len]; - cursor.read_exact(&mut buf)?; - String::from_utf8(buf).map_err(|_| BinaryError::InvalidUtf8) -} - -fn read_bytes_from(cursor: &mut Cursor<&[u8]>, what: &'static str) -> Result> { - let len = read_u32_from(cursor)? as usize; - if len > MAX_SECTION_LEN { - return Err(BinaryError::LimitExceeded { - what, - limit: MAX_SECTION_LEN, - }); - } - let mut buf = vec![0u8; len]; - cursor.read_exact(&mut buf)?; - Ok(buf) -} - -/// Count total number of functions (including nested) -fn count_functions(func: &Function) -> usize { - let mut count = 1; - for nested in &func.nested_functions { - count += count_functions(nested); - } - count -} diff --git a/bytecode/src/asm/disasm.rs b/bytecode/src/asm/disasm.rs deleted file mode 100644 index e0f5a09..0000000 --- a/bytecode/src/asm/disasm.rs +++ /dev/null @@ -1,1078 +0,0 @@ -// disassembler: bytecode -> .aasm text - -use crate::bytecode::{Function, OpCode, decode_a, decode_b, decode_c}; -use crate::heap::Heap; -use crate::object::{GcRef, ObjectKind}; -use crate::value::Value; -use std::collections::{HashMap, HashSet}; -use std::fmt::Write; - -macro_rules! writeln_ignore { - ($dst:expr) => { let _ = writeln!($dst); }; - ($dst:expr, $($arg:tt)*) => { let _ = writeln!($dst, $($arg)*); }; -} - -#[derive(Debug, Clone, Default)] -pub struct DisassemblerOptions { - pub include_line_info: bool, -} - -pub fn disassemble(func: &Function, heap: Option<&Heap>) -> String { - disassemble_with_options(func, heap, &DisassemblerOptions::default()) -} - -pub fn disassemble_with_options( - func: &Function, - heap: Option<&Heap>, - options: &DisassemblerOptions, -) -> String { - let mut output = String::new(); - let mut ctx = DisasmContext::new(heap, options); - - writeln_ignore!(output, "; Aelys Assembly (.aasm)"); - writeln_ignore!(output, "; Disassembled from bytecode"); - writeln_ignore!(output); - writeln_ignore!(output, ".version 1"); - writeln_ignore!(output); - - let mut all_functions = Vec::new(); - collect_functions(func, &mut all_functions); - - for (idx, f) in all_functions.iter().enumerate() { - if idx > 0 { - writeln_ignore!(output); - } - ctx.set_function_context(f, &all_functions); - ctx.disassemble_function(&mut output, f, idx); - } - - output -} - -pub fn disassemble_to_string(func: &Function, heap: Option<&Heap>) -> String { - disassemble(func, heap) -} - -fn collect_functions<'a>(func: &'a Function, out: &mut Vec<&'a Function>) { - out.push(func); - for f in &func.nested_functions { - collect_functions(f, out); - } -} - -struct DisasmContext<'a> { - heap: Option<&'a Heap>, - options: &'a DisassemblerOptions, - global_names: Vec, - nested_fn_names: Vec>, -} - -impl<'a> DisasmContext<'a> { - fn new(heap: Option<&'a Heap>, options: &'a DisassemblerOptions) -> Self { - Self { - heap, - options, - global_names: Vec::new(), - nested_fn_names: Vec::new(), - } - } - - fn set_function_context(&mut self, func: &Function, all_functions: &[&Function]) { - self.global_names = func.global_layout.names().to_vec(); - self.nested_fn_names = func - .nested_functions - .iter() - .map(|f| f.name.clone()) - .collect(); - // also include names from all_functions for @N references - if self.nested_fn_names.is_empty() { - self.nested_fn_names = all_functions - .iter() - .skip(1) - .map(|f| f.name.clone()) - .collect(); - } - } - - fn global_name(&self, idx: usize) -> Option<&str> { - self.global_names.get(idx).map(|s| s.as_str()) - } - - fn nested_fn_name(&self, idx: usize) -> Option<&str> { - self.nested_fn_names.get(idx).and_then(|o| o.as_deref()) - } - - fn disassemble_function(&self, output: &mut String, func: &Function, func_idx: usize) { - writeln_ignore!(output, "; !== FUN {}", func_idx); - writeln_ignore!(output, ".function {}", func_idx); - - if let Some(name) = &func.name { - writeln_ignore!(output, " .name \"{}\"", escape_string(name)); - } - writeln_ignore!(output, " .arity {}", func.arity); - writeln_ignore!(output, " .registers {}", func.num_registers); - - // Output global names for ALL functions (needed for indexed global access and module loading) - if !func.global_layout.names().is_empty() { - writeln_ignore!(output); - writeln_ignore!(output, " .globals"); - for (idx, name) in func.global_layout.names().iter().enumerate() { - writeln_ignore!(output, " {}: \"{}\"", idx, escape_string(name)); - } - } - writeln_ignore!(output); - - if !func.constants.is_empty() { - writeln_ignore!(output, " .constants"); - for (idx, constant) in func.constants.iter().enumerate() { - let const_str = self.format_constant(constant, &func.nested_functions); - writeln_ignore!(output, " {}: {}", idx, const_str); - } - writeln_ignore!(output); - } - - // Output upvalue descriptors if present - if !func.upvalue_descriptors.is_empty() { - writeln_ignore!(output, " .upvalues"); - for (idx, desc) in func.upvalue_descriptors.iter().enumerate() { - let kind = if desc.is_local { "local" } else { "upvalue" }; - writeln_ignore!(output, " {}: {} {}", idx, kind, desc.index); - } - writeln_ignore!(output); - } - - if func.bytecode.is_empty() { - writeln_ignore!(output, " .code"); - writeln_ignore!(output, " ; (empty)"); - return; - } - - // two-pass: collect jump targets first, then disasm with labels - let labels = self.collect_jump_targets(func.bytecode.as_slice()); - - writeln_ignore!(output, " .code"); - let mut skip_cache_words = 0usize; - for (offset, &instr) in func.bytecode.iter().enumerate() { - // Skip cache words (they follow CallGlobal, CallGlobalMono, CallGlobalNative) - if skip_cache_words > 0 { - skip_cache_words -= 1; - continue; - } - - // Emit label if this is a jump target - if let Some(label) = labels.get(&offset) { - writeln_ignore!(output, " {}:", label); - } - - let disasm = self.disassemble_instruction(instr, offset, &labels); - - // Check if this instruction has cache words following it - let opcode = OpCode::from_u8((instr >> 24) as u8); - if let Some(OpCode::CallGlobal | OpCode::CallGlobalMono | OpCode::CallGlobalNative) = - opcode - { - skip_cache_words = 2; // Skip the 2 cache words - } - - if self.options.include_line_info { - let line = func.get_line(offset); - if line > 0 { - writeln_ignore!(output, " {:04}: {:30} ; line {}", offset, disasm, line); - } else { - writeln_ignore!(output, " {:04}: {}", offset, disasm); - } - } else { - writeln_ignore!(output, " {:04}: {}", offset, disasm); - } - } - } - - fn collect_jump_targets(&self, bytecode: &[u32]) -> HashMap { - let mut targets = HashSet::new(); - - for (offset, &instr) in bytecode.iter().enumerate() { - let opcode = OpCode::from_u8((instr >> 24) as u8); - - if let Some(OpCode::Jump | OpCode::JumpIf | OpCode::JumpIfNot) = opcode { - let (_, _, imm) = decode_b(instr); - let target = if imm >= 0 { - offset.wrapping_add(1).wrapping_add(imm as usize) - } else { - offset.wrapping_add(1).wrapping_sub((-imm) as usize) - }; - targets.insert(target); - } - } - - let mut sorted_targets: Vec<_> = targets.into_iter().collect(); - sorted_targets.sort(); - - let mut labels = HashMap::new(); - for (idx, target) in sorted_targets.into_iter().enumerate() { - labels.insert(target, format!("L{}", idx)); - } - - labels - } - - fn disassemble_instruction( - &self, - instr: u32, - offset: usize, - labels: &HashMap, - ) -> String { - let opcode = match OpCode::from_u8((instr >> 24) as u8) { - Some(op) => op, - None => return format!(".word 0x{:08x}", instr), - }; - - match opcode { - // Format A: 3 registers - OpCode::Move => { - let (_, a, b, _) = decode_a(instr); - format!("Move r{}, r{}", a, b) - } - OpCode::LoadNull => { - let (_, a, _, _) = decode_a(instr); - format!("LoadNull r{}", a) - } - OpCode::LoadBool => { - let (_, a, b, _) = decode_a(instr); - format!("LoadBool r{}, {}", a, b != 0) - } - OpCode::Add => { - let (_, a, b, c) = decode_a(instr); - format!("Add r{}, r{}, r{}", a, b, c) - } - OpCode::Sub => { - let (_, a, b, c) = decode_a(instr); - format!("Sub r{}, r{}, r{}", a, b, c) - } - OpCode::Mul => { - let (_, a, b, c) = decode_a(instr); - format!("Mul r{}, r{}, r{}", a, b, c) - } - OpCode::Div => { - let (_, a, b, c) = decode_a(instr); - format!("Div r{}, r{}, r{}", a, b, c) - } - OpCode::Mod => { - let (_, a, b, c) = decode_a(instr); - format!("Mod r{}, r{}, r{}", a, b, c) - } - OpCode::Neg => { - let (_, a, b, _) = decode_a(instr); - format!("Neg r{}, r{}", a, b) - } - OpCode::Eq => { - let (_, a, b, c) = decode_a(instr); - format!("Eq r{}, r{}, r{}", a, b, c) - } - OpCode::Ne => { - let (_, a, b, c) = decode_a(instr); - format!("Ne r{}, r{}, r{}", a, b, c) - } - OpCode::Lt => { - let (_, a, b, c) = decode_a(instr); - format!("Lt r{}, r{}, r{}", a, b, c) - } - OpCode::Le => { - let (_, a, b, c) = decode_a(instr); - format!("Le r{}, r{}, r{}", a, b, c) - } - OpCode::Gt => { - let (_, a, b, c) = decode_a(instr); - format!("Gt r{}, r{}, r{}", a, b, c) - } - OpCode::Ge => { - let (_, a, b, c) = decode_a(instr); - format!("Ge r{}, r{}, r{}", a, b, c) - } - OpCode::Not => { - let (_, a, b, _) = decode_a(instr); - format!("Not r{}, r{}", a, b) - } - OpCode::Call => { - let (_, dest, func, nargs) = decode_a(instr); - format!("Call r{}, r{}, {}", dest, func, nargs) - } - OpCode::Return => { - let (_, a, _, _) = decode_a(instr); - format!("Return r{}", a) - } - OpCode::Return0 => "Return0".to_string(), - OpCode::GetGlobal => { - let (_, a, k, _) = decode_a(instr); - format!("GetGlobal r{}, {}", a, k) - } - OpCode::SetGlobal => { - let (_, a, k, _) = decode_a(instr); - format!("SetGlobal r{}, {}", a, k) - } - OpCode::IncGlobalI => { - let (_, a, k, b) = decode_c(instr); - format!("IncGlobalI r{}, {}, {}", a, k, b) - } - OpCode::EnterNoGc => "EnterNoGc".to_string(), - OpCode::ExitNoGc => "ExitNoGc".to_string(), - OpCode::Alloc => { - let (_, a, b, _) = decode_a(instr); - format!("Alloc r{}, r{}", a, b) - } - OpCode::Free => { - let (_, a, _, _) = decode_a(instr); - format!("Free r{}", a) - } - OpCode::LoadMem => { - let (_, a, b, c) = decode_a(instr); - format!("LoadMem r{}, r{}, r{}", a, b, c) - } - OpCode::LoadMemI => { - let (_, a, b, c) = decode_a(instr); - format!("LoadMemI r{}, r{}, {}", a, b, c) - } - OpCode::StoreMem => { - let (_, a, b, c) = decode_a(instr); - format!("StoreMem r{}, r{}, r{}", a, b, c) - } - OpCode::StoreMemI => { - let (_, a, b, c) = decode_a(instr); - format!("StoreMemI r{}, {}, r{}", a, b, c) - } - OpCode::Print => { - let (_, a, _, _) = decode_a(instr); - format!("Print r{}", a) - } - - // Format B: register + immediate - OpCode::LoadI => { - let (_, a, imm) = decode_b(instr); - format!("LoadI r{}, {}", a, imm) - } - OpCode::LoadK => { - let (_, a, k) = decode_b(instr); - format!("LoadK r{}, {}", a, k) - } - OpCode::Jump => { - let (_, _, imm) = decode_b(instr); - let target = if imm >= 0 { - offset.wrapping_add(1).wrapping_add(imm as usize) - } else { - offset.wrapping_add(1).wrapping_sub((-imm) as usize) - }; - if let Some(label) = labels.get(&target) { - format!("Jump {}", label) - } else { - format!("Jump @{}", target) - } - } - OpCode::JumpIf => { - let (_, a, imm) = decode_b(instr); - let target = if imm >= 0 { - offset.wrapping_add(1).wrapping_add(imm as usize) - } else { - offset.wrapping_add(1).wrapping_sub((-imm) as usize) - }; - if let Some(label) = labels.get(&target) { - format!("JumpIf r{}, {}", a, label) - } else { - format!("JumpIf r{}, @{}", a, target) - } - } - OpCode::JumpIfNot => { - let (_, a, imm) = decode_b(instr); - let target = if imm >= 0 { - offset.wrapping_add(1).wrapping_add(imm as usize) - } else { - offset.wrapping_add(1).wrapping_sub((-imm) as usize) - }; - if let Some(label) = labels.get(&target) { - format!("JumpIfNot r{}, {}", a, label) - } else { - format!("JumpIfNot r{}, @{}", a, target) - } - } - - // Closure opcodes - OpCode::MakeClosure => { - let (_, a, k, upval_count) = decode_a(instr); - format!("MakeClosure r{}, k{}, {}", a, k, upval_count) - } - OpCode::GetUpval => { - let (_, a, upval_idx, _) = decode_a(instr); - format!("GetUpval r{}, upval[{}]", a, upval_idx) - } - OpCode::SetUpval => { - let (_, upval_idx, src, _) = decode_a(instr); - format!("SetUpval upval[{}], r{}", upval_idx, src) - } - OpCode::CloseUpvals => { - let (_, from_reg, _, _) = decode_a(instr); - format!("CloseUpvals r{}", from_reg) - } - OpCode::ForLoopI => { - let (_, a, offset) = decode_b(instr); - format!("ForLoopI r{}, {}", a, offset) - } - OpCode::ForLoopIInc => { - let (_, a, offset) = decode_b(instr); - format!("ForLoopIInc r{}, {}", a, offset) - } - // Immediate arithmetic - OpCode::AddI => { - let (_, a, b, c) = decode_a(instr); - format!("AddI r{}, r{}, {}", a, b, c) - } - OpCode::SubI => { - let (_, a, b, c) = decode_a(instr); - format!("SubI r{}, r{}, {}", a, b, c) - } - // Immediate comparison - OpCode::LtImm => { - let (_, a, imm) = decode_b(instr); - format!("LtImm r{}, {}", a, imm) - } - OpCode::LeImm => { - let (_, a, imm) = decode_b(instr); - format!("LeImm r{}, {}", a, imm) - } - OpCode::GtImm => { - let (_, a, imm) = decode_b(instr); - format!("GtImm r{}, {}", a, imm) - } - OpCode::GeImm => { - let (_, a, imm) = decode_b(instr); - format!("GeImm r{}, {}", a, imm) - } - // While loop superinstruction - OpCode::WhileLoopLt => { - let (_, a, offset) = decode_b(instr); - format!("WhileLoopLt r{}, {}", a, offset) - } - // Type-specialized integer arithmetic - OpCode::AddII => { - let (_, a, b, c) = decode_a(instr); - format!("AddII r{}, r{}, r{}", a, b, c) - } - OpCode::SubII => { - let (_, a, b, c) = decode_a(instr); - format!("SubII r{}, r{}, r{}", a, b, c) - } - OpCode::MulII => { - let (_, a, b, c) = decode_a(instr); - format!("MulII r{}, r{}, r{}", a, b, c) - } - OpCode::DivII => { - let (_, a, b, c) = decode_a(instr); - format!("DivII r{}, r{}, r{}", a, b, c) - } - OpCode::ModII => { - let (_, a, b, c) = decode_a(instr); - format!("ModII r{}, r{}, r{}", a, b, c) - } - // Type-specialized float arithmetic - OpCode::AddFF => { - let (_, a, b, c) = decode_a(instr); - format!("AddFF r{}, r{}, r{}", a, b, c) - } - OpCode::SubFF => { - let (_, a, b, c) = decode_a(instr); - format!("SubFF r{}, r{}, r{}", a, b, c) - } - OpCode::MulFF => { - let (_, a, b, c) = decode_a(instr); - format!("MulFF r{}, r{}, r{}", a, b, c) - } - OpCode::DivFF => { - let (_, a, b, c) = decode_a(instr); - format!("DivFF r{}, r{}, r{}", a, b, c) - } - OpCode::ModFF => { - let (_, a, b, c) = decode_a(instr); - format!("ModFF r{}, r{}, r{}", a, b, c) - } - // Type-specialized integer comparisons - OpCode::LtII => { - let (_, a, b, c) = decode_a(instr); - format!("LtII r{}, r{}, r{}", a, b, c) - } - OpCode::LeII => { - let (_, a, b, c) = decode_a(instr); - format!("LeII r{}, r{}, r{}", a, b, c) - } - OpCode::GtII => { - let (_, a, b, c) = decode_a(instr); - format!("GtII r{}, r{}, r{}", a, b, c) - } - OpCode::GeII => { - let (_, a, b, c) = decode_a(instr); - format!("GeII r{}, r{}, r{}", a, b, c) - } - OpCode::EqII => { - let (_, a, b, c) = decode_a(instr); - format!("EqII r{}, r{}, r{}", a, b, c) - } - OpCode::NeII => { - let (_, a, b, c) = decode_a(instr); - format!("NeII r{}, r{}, r{}", a, b, c) - } - // Type-specialized float comparisons - OpCode::LtFF => { - let (_, a, b, c) = decode_a(instr); - format!("LtFF r{}, r{}, r{}", a, b, c) - } - OpCode::LeFF => { - let (_, a, b, c) = decode_a(instr); - format!("LeFF r{}, r{}, r{}", a, b, c) - } - OpCode::GtFF => { - let (_, a, b, c) = decode_a(instr); - format!("GtFF r{}, r{}, r{}", a, b, c) - } - OpCode::GeFF => { - let (_, a, b, c) = decode_a(instr); - format!("GeFF r{}, r{}, r{}", a, b, c) - } - OpCode::EqFF => { - let (_, a, b, c) = decode_a(instr); - format!("EqFF r{}, r{}, r{}", a, b, c) - } - OpCode::NeFF => { - let (_, a, b, c) = decode_a(instr); - format!("NeFF r{}, r{}, r{}", a, b, c) - } - // Integer comparison with immediate - OpCode::LtIImm => { - let (_, a, b, c) = decode_a(instr); - format!("LtIImm r{}, r{}, {}", a, b, c) - } - OpCode::LeIImm => { - let (_, a, b, c) = decode_a(instr); - format!("LeIImm r{}, r{}, {}", a, b, c) - } - OpCode::GtIImm => { - let (_, a, b, c) = decode_a(instr); - format!("GtIImm r{}, r{}, {}", a, b, c) - } - OpCode::GeIImm => { - let (_, a, b, c) = decode_a(instr); - format!("GeIImm r{}, r{}, {}", a, b, c) - } - OpCode::GetGlobalIdx => { - let (_, a, imm) = decode_b(instr); - match self.global_name(imm as usize) { - Some(name) => format!("GetGlobalIdx r{}, {} ; {}", a, imm, name), - None => format!("GetGlobalIdx r{}, {}", a, imm), - } - } - OpCode::SetGlobalIdx => { - let (_, a, imm) = decode_b(instr); - match self.global_name(imm as usize) { - Some(name) => format!("SetGlobalIdx {}, r{} ; {}", imm, a, name), - None => format!("SetGlobalIdx {}, r{}", imm, a), - } - } - OpCode::CallCached => { - let (_, a, b, c) = decode_a(instr); - format!("CallCached r{}, r{}, {}", a, b, c) - } - OpCode::CallGlobal => { - let (_, dest, global_idx, nargs) = decode_a(instr); - match self.global_name(global_idx as usize) { - Some(name) => format!( - "CallGlobal r{}, {}, {} ; {}()", - dest, global_idx, nargs, name - ), - None => format!("CallGlobal r{}, {}, {}", dest, global_idx, nargs), - } - } - OpCode::CallGlobalMono => { - let (_, dest, global_idx, nargs) = decode_a(instr); - match self.global_name(global_idx as usize) { - Some(name) => format!( - "CallGlobalMono r{}, {}, {} ; {}()", - dest, global_idx, nargs, name - ), - None => format!("CallGlobalMono r{}, {}, {}", dest, global_idx, nargs), - } - } - OpCode::CallGlobalNative => { - let (_, dest, global_idx, nargs) = decode_a(instr); - match self.global_name(global_idx as usize) { - Some(name) => format!( - "CallGlobalNative r{}, {}, {} ; {}()", - dest, global_idx, nargs, name - ), - None => format!("CallGlobalNative r{}, {}, {}", dest, global_idx, nargs), - } - } - // CallUpval - combined GetUpval + Call (for recursive closures) - OpCode::CallUpval => { - let (_, dest, upval_idx, nargs) = decode_a(instr); - format!("CallUpval r{}, upval[{}], {}", dest, upval_idx, nargs) - } - // TailCallUpval - tail call via upvalue (reuses stack frame) - OpCode::TailCallUpval => { - let (_, dest, upval_idx, nargs) = decode_a(instr); - format!("TailCallUpval r{}, upval[{}], {}", dest, upval_idx, nargs) - } - - // Guarded integer arithmetic - OpCode::AddIIG => { - let (_, a, b, c) = decode_a(instr); - format!("AddIIG r{}, r{}, r{}", a, b, c) - } - OpCode::SubIIG => { - let (_, a, b, c) = decode_a(instr); - format!("SubIIG r{}, r{}, r{}", a, b, c) - } - OpCode::MulIIG => { - let (_, a, b, c) = decode_a(instr); - format!("MulIIG r{}, r{}, r{}", a, b, c) - } - OpCode::DivIIG => { - let (_, a, b, c) = decode_a(instr); - format!("DivIIG r{}, r{}, r{}", a, b, c) - } - OpCode::ModIIG => { - let (_, a, b, c) = decode_a(instr); - format!("ModIIG r{}, r{}, r{}", a, b, c) - } - - // Guarded float arithmetic - OpCode::AddFFG => { - let (_, a, b, c) = decode_a(instr); - format!("AddFFG r{}, r{}, r{}", a, b, c) - } - OpCode::SubFFG => { - let (_, a, b, c) = decode_a(instr); - format!("SubFFG r{}, r{}, r{}", a, b, c) - } - OpCode::MulFFG => { - let (_, a, b, c) = decode_a(instr); - format!("MulFFG r{}, r{}, r{}", a, b, c) - } - OpCode::DivFFG => { - let (_, a, b, c) = decode_a(instr); - format!("DivFFG r{}, r{}, r{}", a, b, c) - } - OpCode::ModFFG => { - let (_, a, b, c) = decode_a(instr); - format!("ModFFG r{}, r{}, r{}", a, b, c) - } - - // Guarded integer comparisons - OpCode::LtIIG => { - let (_, a, b, c) = decode_a(instr); - format!("LtIIG r{}, r{}, r{}", a, b, c) - } - OpCode::LeIIG => { - let (_, a, b, c) = decode_a(instr); - format!("LeIIG r{}, r{}, r{}", a, b, c) - } - OpCode::GtIIG => { - let (_, a, b, c) = decode_a(instr); - format!("GtIIG r{}, r{}, r{}", a, b, c) - } - OpCode::GeIIG => { - let (_, a, b, c) = decode_a(instr); - format!("GeIIG r{}, r{}, r{}", a, b, c) - } - OpCode::EqIIG => { - let (_, a, b, c) = decode_a(instr); - format!("EqIIG r{}, r{}, r{}", a, b, c) - } - OpCode::NeIIG => { - let (_, a, b, c) = decode_a(instr); - format!("NeIIG r{}, r{}, r{}", a, b, c) - } - - // Guarded float comparisons - OpCode::LtFFG => { - let (_, a, b, c) = decode_a(instr); - format!("LtFFG r{}, r{}, r{}", a, b, c) - } - OpCode::LeFFG => { - let (_, a, b, c) = decode_a(instr); - format!("LeFFG r{}, r{}, r{}", a, b, c) - } - OpCode::GtFFG => { - let (_, a, b, c) = decode_a(instr); - format!("GtFFG r{}, r{}, r{}", a, b, c) - } - OpCode::GeFFG => { - let (_, a, b, c) = decode_a(instr); - format!("GeFFG r{}, r{}, r{}", a, b, c) - } - OpCode::EqFFG => { - let (_, a, b, c) = decode_a(instr); - format!("EqFFG r{}, r{}, r{}", a, b, c) - } - OpCode::NeFFG => { - let (_, a, b, c) = decode_a(instr); - format!("NeFFG r{}, r{}, r{}", a, b, c) - } - - // Generic bitwise operations - OpCode::Shl => { - let (_, a, b, c) = decode_a(instr); - format!("Shl r{}, r{}, r{}", a, b, c) - } - OpCode::Shr => { - let (_, a, b, c) = decode_a(instr); - format!("Shr r{}, r{}, r{}", a, b, c) - } - OpCode::BitAnd => { - let (_, a, b, c) = decode_a(instr); - format!("BitAnd r{}, r{}, r{}", a, b, c) - } - OpCode::BitOr => { - let (_, a, b, c) = decode_a(instr); - format!("BitOr r{}, r{}, r{}", a, b, c) - } - OpCode::BitXor => { - let (_, a, b, c) = decode_a(instr); - format!("BitXor r{}, r{}, r{}", a, b, c) - } - OpCode::BitNot => { - let (_, a, b, _) = decode_a(instr); - format!("BitNot r{}, r{}", a, b) - } - - // Type-specialized integer bitwise - OpCode::ShlII => { - let (_, a, b, c) = decode_a(instr); - format!("ShlII r{}, r{}, r{}", a, b, c) - } - OpCode::ShrII => { - let (_, a, b, c) = decode_a(instr); - format!("ShrII r{}, r{}, r{}", a, b, c) - } - OpCode::AndII => { - let (_, a, b, c) = decode_a(instr); - format!("AndII r{}, r{}, r{}", a, b, c) - } - OpCode::OrII => { - let (_, a, b, c) = decode_a(instr); - format!("OrII r{}, r{}, r{}", a, b, c) - } - OpCode::XorII => { - let (_, a, b, c) = decode_a(instr); - format!("XorII r{}, r{}, r{}", a, b, c) - } - OpCode::NotI => { - let (_, a, b, _) = decode_a(instr); - format!("NotI r{}, r{}", a, b) - } - - // Bitwise with immediate - OpCode::ShlIImm => { - let (_, a, b, c) = decode_a(instr); - format!("ShlIImm r{}, r{}, {}", a, b, c) - } - OpCode::ShrIImm => { - let (_, a, b, c) = decode_a(instr); - format!("ShrIImm r{}, r{}, {}", a, b, c) - } - OpCode::AndIImm => { - let (_, a, b, c) = decode_a(instr); - format!("AndIImm r{}, r{}, {}", a, b, c) - } - OpCode::OrIImm => { - let (_, a, b, c) = decode_a(instr); - format!("OrIImm r{}, r{}, {}", a, b, c) - } - OpCode::XorIImm => { - let (_, a, b, c) = decode_a(instr); - format!("XorIImm r{}, r{}, {}", a, b, c) - } - - // Array operations - OpCode::ArrayNewI => { - let (_, a, b, _) = decode_a(instr); - format!("ArrayNewI r{}, r{}", a, b) - } - OpCode::ArrayNewF => { - let (_, a, b, _) = decode_a(instr); - format!("ArrayNewF r{}, r{}", a, b) - } - OpCode::ArrayNewB => { - let (_, a, b, _) = decode_a(instr); - format!("ArrayNewB r{}, r{}", a, b) - } - OpCode::ArrayNewP => { - let (_, a, b, _) = decode_a(instr); - format!("ArrayNewP r{}, r{}", a, b) - } - OpCode::ArrayLit => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayLit r{}, r{}, {}", a, b, c) - } - OpCode::ArrayLoadI => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayLoadI r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayLoadF => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayLoadF r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayLoadB => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayLoadB r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayLoadP => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayLoadP r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayGetI => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayGetI r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayGetF => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayGetF r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayGetB => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayGetB r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayGetP => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayGetP r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayStoreI => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayStoreI r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayStoreF => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayStoreF r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayStoreB => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayStoreB r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayStoreP => { - let (_, a, b, c) = decode_a(instr); - format!("ArrayStoreP r{}, r{}, r{}", a, b, c) - } - OpCode::ArrayLen => { - let (_, a, b, _) = decode_a(instr); - format!("ArrayLen r{}, r{}", a, b) - } - - // Vec operations - OpCode::VecNewI => { - let (_, a, b, _) = decode_a(instr); - format!("VecNewI r{}, r{}", a, b) - } - OpCode::VecNewF => { - let (_, a, b, _) = decode_a(instr); - format!("VecNewF r{}, r{}", a, b) - } - OpCode::VecNewB => { - let (_, a, b, _) = decode_a(instr); - format!("VecNewB r{}, r{}", a, b) - } - OpCode::VecNewP => { - let (_, a, b, _) = decode_a(instr); - format!("VecNewP r{}, r{}", a, b) - } - OpCode::VecLit => { - let (_, a, b, c) = decode_a(instr); - format!("VecLit r{}, r{}, {}", a, b, c) - } - OpCode::VecPushI => { - let (_, a, b, _) = decode_a(instr); - format!("VecPushI r{}, r{}", a, b) - } - OpCode::VecPushF => { - let (_, a, b, _) = decode_a(instr); - format!("VecPushF r{}, r{}", a, b) - } - OpCode::VecPushB => { - let (_, a, b, _) = decode_a(instr); - format!("VecPushB r{}, r{}", a, b) - } - OpCode::VecPushP => { - let (_, a, b, _) = decode_a(instr); - format!("VecPushP r{}, r{}", a, b) - } - OpCode::VecPopI => { - let (_, a, b, _) = decode_a(instr); - format!("VecPopI r{}, r{}", a, b) - } - OpCode::VecPopF => { - let (_, a, b, _) = decode_a(instr); - format!("VecPopF r{}, r{}", a, b) - } - OpCode::VecPopB => { - let (_, a, b, _) = decode_a(instr); - format!("VecPopB r{}, r{}", a, b) - } - OpCode::VecPopP => { - let (_, a, b, _) = decode_a(instr); - format!("VecPopP r{}, r{}", a, b) - } - OpCode::VecLen => { - let (_, a, b, _) = decode_a(instr); - format!("VecLen r{}, r{}", a, b) - } - OpCode::VecCap => { - let (_, a, b, _) = decode_a(instr); - format!("VecCap r{}, r{}", a, b) - } - OpCode::VecReserve => { - let (_, a, b, _) = decode_a(instr); - format!("VecReserve r{}, r{}", a, b) - } - OpCode::VecLoadI => { - let (_, a, b, c) = decode_a(instr); - format!("VecLoadI r{}, r{}, r{}", a, b, c) - } - OpCode::VecLoadF => { - let (_, a, b, c) = decode_a(instr); - format!("VecLoadF r{}, r{}, r{}", a, b, c) - } - OpCode::VecLoadB => { - let (_, a, b, c) = decode_a(instr); - format!("VecLoadB r{}, r{}, r{}", a, b, c) - } - OpCode::VecLoadP => { - let (_, a, b, c) = decode_a(instr); - format!("VecLoadP r{}, r{}, r{}", a, b, c) - } - OpCode::VecGetI => { - let (_, a, b, c) = decode_a(instr); - format!("VecGetI r{}, r{}, r{}", a, b, c) - } - OpCode::VecGetF => { - let (_, a, b, c) = decode_a(instr); - format!("VecGetF r{}, r{}, r{}", a, b, c) - } - OpCode::VecGetB => { - let (_, a, b, c) = decode_a(instr); - format!("VecGetB r{}, r{}, r{}", a, b, c) - } - OpCode::VecGetP => { - let (_, a, b, c) = decode_a(instr); - format!("VecGetP r{}, r{}, r{}", a, b, c) - } - OpCode::VecStoreI => { - let (_, a, b, c) = decode_a(instr); - format!("VecStoreI r{}, r{}, r{}", a, b, c) - } - OpCode::VecStoreF => { - let (_, a, b, c) = decode_a(instr); - format!("VecStoreF r{}, r{}, r{}", a, b, c) - } - OpCode::VecStoreB => { - let (_, a, b, c) = decode_a(instr); - format!("VecStoreB r{}, r{}, r{}", a, b, c) - } - OpCode::VecStoreP => { - let (_, a, b, c) = decode_a(instr); - format!("VecStoreP r{}, r{}, r{}", a, b, c) - } - OpCode::StringLoadChar => { - let (_, a, b, c) = decode_a(instr); - format!("StringLoadChar r{}, r{}, r{}", a, b, c) - } - OpCode::StringForLoop => { - let (_, a, imm) = decode_b(instr); - format!("StringForLoop r{}, {}", a, imm) - } - OpCode::VecForLoop => { - let (_, a, imm) = decode_b(instr); - format!("VecForLoop r{}, {}", a, imm) - } - OpCode::ArrayForLoop => { - let (_, a, imm) = decode_b(instr); - format!("ArrayForLoop r{}, {}", a, imm) - } - } - } - - fn format_constant(&self, value: &Value, nested_functions: &[Function]) -> String { - if value.is_null() { - "null".to_string() - } else if let Some(b) = value.as_bool() { - format!("bool {}", b) - } else if let Some(n) = value.as_int() { - format!("int {}", n) - } else if let Some(f) = value.as_float() { - if f.is_nan() { - "float nan".to_string() - } else if f.is_infinite() { - if f.is_sign_positive() { - "float inf".to_string() - } else { - "float -inf".to_string() - } - } else { - format!("float {}", f) - } - } else if let Some(func_idx) = value.as_nested_fn_marker() { - let name = nested_functions - .get(func_idx) - .and_then(|f| f.name.as_deref()) - .or_else(|| self.nested_fn_name(func_idx)); - match name { - Some(n) => format!("func @{} \"{}\"", func_idx + 1, escape_string(n)), - None => format!("func @{}", func_idx + 1), - } - } else if let Some(ptr) = value.as_ptr() { - if let Some(heap) = self.heap { - if let Some(obj) = heap.get(GcRef::new(ptr)) { - match &obj.kind { - ObjectKind::String(s) => { - format!("string \"{}\"", escape_string(s.as_str())) - } - ObjectKind::Function(f) => { - if let Some(name) = f.name() { - format!("func \"{}\"", escape_string(name)) - } else { - "func ".to_string() - } - } - ObjectKind::Native(n) => { - format!("native \"{}\"", escape_string(&n.name)) - } - ObjectKind::Upvalue(_) => "upvalue".to_string(), - ObjectKind::Closure(_) => "closure".to_string(), - ObjectKind::Array(a) => format!("array[{}]", a.len()), - ObjectKind::Vec(v) => format!("vec[{}]", v.len()), - } - } else { - format!("ptr {}", ptr) - } - } else { - format!("ptr {}", ptr) - } - } else { - format!("unknown 0x{:016x}", value.as_int().unwrap_or(0) as u64) - } - } -} - -pub fn escape_string(s: &str) -> String { - let mut result = String::with_capacity(s.len()); - for c in s.chars() { - match c { - '\n' => result.push_str("\\n"), - '\r' => result.push_str("\\r"), - '\t' => result.push_str("\\t"), - '\\' => result.push_str("\\\\"), - '"' => result.push_str("\\\""), - '\0' => result.push_str("\\0"), - c if c.is_control() => { - // Use \xNN for other control characters - for byte in c.to_string().bytes() { - result.push_str(&format!("\\x{:02x}", byte)); - } - } - c => result.push(c), - } - } - result -} diff --git a/bytecode/src/asm/lexer.rs b/bytecode/src/asm/lexer.rs deleted file mode 100644 index 13f30cb..0000000 --- a/bytecode/src/asm/lexer.rs +++ /dev/null @@ -1,306 +0,0 @@ -//! Lexer for .aasm files - -use super::AssemblerError; - -/// Result type for lexer operations -pub(super) type Result = std::result::Result; - -/// Token types for the assembler lexer -#[derive(Debug, Clone, PartialEq)] -pub(super) enum Token { - /// A directive like .function, .name, .code - Directive(String), - /// A label reference like L0 - LabelRef(String), - /// An identifier/opcode like LoadI, Move - Ident(String), - /// A register like r0, r1 - Register(u8), - /// An integer literal - Int(i64), - /// A float literal - Float(f64), - /// A string literal (already unescaped) - String(String), - /// A boolean literal - Bool(bool), - /// Null literal - Null, - /// Comma separator - Comma, - /// Colon - Colon, - /// @ symbol (for absolute addresses) - At, - /// [ left bracket - LBracket, - /// ] right bracket - RBracket, - /// End of line - Newline, - /// End of file - Eof, -} - -/// Lexer for .aasm files -pub(super) struct Lexer<'a> { - chars: std::iter::Peekable>, - line: usize, - _marker: std::marker::PhantomData<&'a str>, -} - -impl<'a> Lexer<'a> { - pub(super) fn new(source: &'a str) -> Self { - Self { - chars: source.char_indices().peekable(), - line: 1, - _marker: std::marker::PhantomData, - } - } - - pub(super) fn current_line(&self) -> usize { - self.line - } - - pub(super) fn next_token(&mut self) -> Result { - self.skip_whitespace_and_comments(); - - let Some(&(_, c)) = self.chars.peek() else { - return Ok(Token::Eof); - }; - - match c { - '\n' => { - self.chars.next(); - self.line += 1; - Ok(Token::Newline) - } - ',' => { - self.chars.next(); - Ok(Token::Comma) - } - ':' => { - self.chars.next(); - Ok(Token::Colon) - } - '@' => { - self.chars.next(); - Ok(Token::At) - } - '[' => { - self.chars.next(); - Ok(Token::LBracket) - } - ']' => { - self.chars.next(); - Ok(Token::RBracket) - } - '.' => { - self.chars.next(); - let name = self.read_identifier(); - Ok(Token::Directive(name)) - } - '"' => { - self.chars.next(); - let s = self.read_string()?; - Ok(Token::String(s)) - } - 'r' if self.peek_is_digit(1) => { - self.chars.next(); - let num = self.read_number()?; - if let Token::Int(n) = num { - if (0..=255).contains(&n) { - Ok(Token::Register(n as u8)) - } else { - Err(AssemblerError::InvalidRegister(format!("r{}", n))) - } - } else { - Err(AssemblerError::InvalidRegister( - "register must be integer".to_string(), - )) - } - } - c if c.is_ascii_alphabetic() || c == '_' => { - let name = self.read_identifier(); - match name.as_str() { - "true" => Ok(Token::Bool(true)), - "false" => Ok(Token::Bool(false)), - "null" => Ok(Token::Null), - _ => { - // Labels are L followed by digits or underscore only (L0, L1, L_else) - // But not opcodes like LoadI, LoadK - if name.starts_with('L') && name.len() > 1 { - let rest = &name[1..]; - // If the rest is all digits or starts with underscore, it's a label - if rest - .chars() - .next() - .map(|c| c.is_ascii_digit() || c == '_') - .unwrap_or(false) - { - return Ok(Token::LabelRef(name)); - } - } - Ok(Token::Ident(name)) - } - } - } - c if c.is_ascii_digit() || c == '-' => self.read_number(), - _ => Err(AssemblerError::ParseError { - line: self.line, - message: format!("Unexpected character: '{}'", c), - }), - } - } - - fn skip_whitespace_and_comments(&mut self) { - loop { - match self.chars.peek() { - Some(&(_, ' ')) | Some(&(_, '\t')) | Some(&(_, '\r')) => { - self.chars.next(); - } - Some(&(_, ';')) => { - while let Some(&(_, c)) = self.chars.peek() { - if c == '\n' { - break; - } - self.chars.next(); - } - } - _ => break, - } - } - } - - /// Peek ahead to see if the next character is a digit - fn peek_is_digit(&self, _offset: usize) -> bool { - let mut chars = self.chars.clone(); - chars.next(); - chars - .peek() - .map(|&(_, c)| c.is_ascii_digit()) - .unwrap_or(false) - } - - fn read_identifier(&mut self) -> String { - let mut name = String::new(); - while let Some(&(_, c)) = self.chars.peek() { - if c.is_ascii_alphanumeric() || c == '_' { - name.push(c); - self.chars.next(); - } else { - break; - } - } - name - } - - fn read_number(&mut self) -> Result { - let mut num_str = String::new(); - let mut is_float = false; - - if let Some(&(_, '-')) = self.chars.peek() { - num_str.push('-'); - self.chars.next(); - } - - while let Some(&(_, c)) = self.chars.peek() { - if c.is_ascii_digit() { - num_str.push(c); - self.chars.next(); - } else if c == '.' && !is_float { - is_float = true; - num_str.push(c); - self.chars.next(); - } else if (c == 'e' || c == 'E') && !num_str.contains('e') && !num_str.contains('E') { - is_float = true; - num_str.push(c); - self.chars.next(); - if let Some(&(_, sign)) = self.chars.peek() - && (sign == '+' || sign == '-') - { - num_str.push(sign); - self.chars.next(); - } - } else { - break; - } - } - - if num_str == "-" { - let rest = self.read_identifier(); - if rest == "inf" { - return Ok(Token::Float(f64::NEG_INFINITY)); - } - return Err(AssemblerError::InvalidNumber(format!("-{}", rest))); - } - - if is_float { - num_str - .parse::() - .map(Token::Float) - .map_err(|_| AssemblerError::InvalidNumber(num_str)) - } else { - num_str - .parse::() - .map(Token::Int) - .map_err(|_| AssemblerError::InvalidNumber(num_str)) - } - } - - fn read_string(&mut self) -> Result { - let mut result = String::new(); - loop { - match self.chars.next() { - None => { - return Err(AssemblerError::InvalidString( - "Unterminated string".to_string(), - )); - } - Some((_, '"')) => break, - Some((_, '\\')) => match self.chars.next() { - None => { - return Err(AssemblerError::InvalidString( - "Unterminated escape".to_string(), - )); - } - Some((_, 'n')) => result.push('\n'), - Some((_, 'r')) => result.push('\r'), - Some((_, 't')) => result.push('\t'), - Some((_, '\\')) => result.push('\\'), - Some((_, '"')) => result.push('"'), - Some((_, '0')) => result.push('\0'), - Some((_, 'x')) => { - let mut hex = String::new(); - for _ in 0..2 { - match self.chars.next() { - Some((_, c)) if c.is_ascii_hexdigit() => hex.push(c), - _ => { - return Err(AssemblerError::InvalidString( - "Invalid hex escape".to_string(), - )); - } - } - } - let byte = u8::from_str_radix(&hex, 16).map_err(|_| { - AssemblerError::InvalidString("Invalid hex escape".to_string()) - })?; - result.push(byte as char); - } - Some((_, c)) => { - return Err(AssemblerError::InvalidString(format!( - "Unknown escape: \\{}", - c - ))); - } - }, - Some((_, '\n')) => { - self.line += 1; - result.push('\n'); - } - Some((_, c)) => result.push(c), - } - } - Ok(result) - } -} diff --git a/bytecode/src/asm/mod.rs b/bytecode/src/asm/mod.rs deleted file mode 100644 index b17ddc0..0000000 --- a/bytecode/src/asm/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -// .aasm assembly format - text representation of bytecode - -pub mod assembler; -pub mod binary; -pub mod disasm; -mod lexer; -mod opcodes; - -pub use assembler::{AssemblerError, assemble, assemble_from_string}; -pub use binary::{ - BinaryError, NativeBundle, deserialize, deserialize_with_manifest, serialize, - serialize_with_manifest, -}; -pub use disasm::{DisassemblerOptions, disassemble, disassemble_to_string}; diff --git a/bytecode/src/asm/opcodes.rs b/bytecode/src/asm/opcodes.rs deleted file mode 100644 index 2d300c9..0000000 --- a/bytecode/src/asm/opcodes.rs +++ /dev/null @@ -1,777 +0,0 @@ -//! Opcode parsing for the assembler - -use super::assembler::{AasmParser, AssemblerError, Result}; -use super::lexer::Token; -use crate::bytecode::OpCode; - -impl<'a> AasmParser<'a> { - pub(super) fn parse_instruction( - &mut self, - bytecode: &mut Vec, - label_refs: &mut Vec<(usize, String, bool)>, - ) -> Result<()> { - let opcode_name = match self.advance()? { - Token::Ident(name) => name, - t => { - return Err(AssemblerError::Expected { - expected: "opcode".to_string(), - got: format!("{:?}", t), - }); - } - }; - - let mut extra_cache_words = 0usize; - let instr = match opcode_name.as_str() { - "Move" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - encode_a(OpCode::Move, a, b, 0) - } - "LoadI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let imm = self.parse_i16()?; - encode_b(OpCode::LoadI, a, imm) - } - "LoadK" => { - let a = self.parse_register()?; - self.skip_comma()?; - let k = self.parse_i16()?; - encode_b(OpCode::LoadK, a, k) - } - "LoadNull" => { - let a = self.parse_register()?; - encode_a(OpCode::LoadNull, a, 0, 0) - } - "LoadBool" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = match self.advance()? { - Token::Bool(b) => { - if b { - 1 - } else { - 0 - } - } - Token::Ident(s) if s == "true" => 1, - Token::Ident(s) if s == "false" => 0, - t => { - return Err(AssemblerError::Expected { - expected: "bool".to_string(), - got: format!("{:?}", t), - }); - } - }; - encode_a(OpCode::LoadBool, a, b, 0) - } - "Add" => self.parse_ternary_reg(OpCode::Add)?, - "Sub" => self.parse_ternary_reg(OpCode::Sub)?, - "Mul" => self.parse_ternary_reg(OpCode::Mul)?, - "Div" => self.parse_ternary_reg(OpCode::Div)?, - "Mod" => self.parse_ternary_reg(OpCode::Mod)?, - "Neg" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - encode_a(OpCode::Neg, a, b, 0) - } - "Eq" => self.parse_ternary_reg(OpCode::Eq)?, - "Ne" => self.parse_ternary_reg(OpCode::Ne)?, - "Lt" => self.parse_ternary_reg(OpCode::Lt)?, - "Le" => self.parse_ternary_reg(OpCode::Le)?, - "Gt" => self.parse_ternary_reg(OpCode::Gt)?, - "Ge" => self.parse_ternary_reg(OpCode::Ge)?, - "Not" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - encode_a(OpCode::Not, a, b, 0) - } - "Jump" => { - let (offset, label) = self.parse_jump_target()?; - let instr = encode_b(OpCode::Jump, 0, offset); - if let Some(lbl) = label { - label_refs.push((bytecode.len(), lbl, false)); - } - instr - } - "JumpIf" => { - let a = self.parse_register()?; - self.skip_comma()?; - let (offset, label) = self.parse_jump_target()?; - let instr = encode_b(OpCode::JumpIf, a, offset); - if let Some(lbl) = label { - label_refs.push((bytecode.len(), lbl, true)); - } - instr - } - "JumpIfNot" => { - let a = self.parse_register()?; - self.skip_comma()?; - let (offset, label) = self.parse_jump_target()?; - let instr = encode_b(OpCode::JumpIfNot, a, offset); - if let Some(lbl) = label { - label_refs.push((bytecode.len(), lbl, true)); - } - instr - } - "Call" => { - let dest = self.parse_register()?; - self.skip_comma()?; - let func = self.parse_register()?; - self.skip_comma()?; - let nargs = self.parse_u8()?; - encode_a(OpCode::Call, dest, func, nargs) - } - "Return" => { - let a = self.parse_register()?; - encode_a(OpCode::Return, a, 0, 0) - } - "Return0" => encode_a(OpCode::Return0, 0, 0, 0), - "GetGlobal" => { - let a = self.parse_register()?; - self.skip_comma()?; - let k = self.parse_u8()?; - encode_a(OpCode::GetGlobal, a, k, 0) - } - "SetGlobal" => { - let a = self.parse_register()?; - self.skip_comma()?; - let k = self.parse_u8()?; - encode_a(OpCode::SetGlobal, a, k, 0) - } - "GetGlobalIdx" => { - let a = self.parse_register()?; - self.skip_comma()?; - let idx = self.parse_i16()?; - encode_b(OpCode::GetGlobalIdx, a, idx) - } - "SetGlobalIdx" => { - let idx = self.parse_i16()?; - self.skip_comma()?; - let a = self.parse_register()?; - encode_b(OpCode::SetGlobalIdx, a, idx) - } - "IncGlobalI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let k = self.parse_u8()?; - self.skip_comma()?; - let b = self.parse_u8()?; - encode_a(OpCode::IncGlobalI, a, k, b) - } - "EnterNoGc" => encode_a(OpCode::EnterNoGc, 0, 0, 0), - "ExitNoGc" => encode_a(OpCode::ExitNoGc, 0, 0, 0), - "Alloc" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - encode_a(OpCode::Alloc, a, b, 0) - } - "Free" => { - let a = self.parse_register()?; - encode_a(OpCode::Free, a, 0, 0) - } - "LoadMem" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::LoadMem, a, b, c) - } - "LoadMemI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::LoadMemI, a, b, c) - } - "StoreMem" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::StoreMem, a, b, c) - } - "StoreMemI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::StoreMemI, a, b, c) - } - "Print" => { - let a = self.parse_register()?; - encode_a(OpCode::Print, a, 0, 0) - } - "MakeClosure" => { - let a = self.parse_register()?; - self.skip_comma()?; - // Handle 'kN' format (e.g., k0, k1) where N is the constant index - let k = if let Token::Ident(s) = &self.current { - if let Some(num_str) = s.strip_prefix('k') { - let k = num_str.parse::().map_err(|_| { - AssemblerError::InvalidNumber(format!("Invalid constant index: {}", s)) - })?; - self.advance()?; - k - } else { - self.parse_u8()? - } - } else { - self.parse_u8()? - }; - self.skip_comma()?; - let upval_count = self.parse_u8()?; - encode_a(OpCode::MakeClosure, a, k, upval_count) - } - "GetUpval" => { - let a = self.parse_register()?; - self.skip_comma()?; - // Parse 'upval[N]' format - let idx = self.parse_upval_index()?; - encode_a(OpCode::GetUpval, a, idx, 0) - } - "SetUpval" => { - // Parse 'upval[N]' format - let idx = self.parse_upval_index()?; - self.skip_comma()?; - let src = self.parse_register()?; - encode_a(OpCode::SetUpval, idx, src, 0) - } - "CloseUpvals" => { - let a = self.parse_register()?; - encode_a(OpCode::CloseUpvals, a, 0, 0) - } - "ForLoopI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let offset = self.parse_i16()?; - // Skip any trailing comment (iter+=step; ...) - while self.current != Token::Newline && self.current != Token::Eof { - self.advance()?; - } - encode_b(OpCode::ForLoopI, a, offset) - } - "ForLoopIInc" => { - let a = self.parse_register()?; - self.skip_comma()?; - let offset = self.parse_i16()?; - // Skip any trailing comment - while self.current != Token::Newline && self.current != Token::Eof { - self.advance()?; - } - encode_b(OpCode::ForLoopIInc, a, offset) - } - // New immediate opcodes - "AddI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::AddI, a, b, c) - } - "SubI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::SubI, a, b, c) - } - "LtImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let imm = self.parse_i16()?; - encode_b(OpCode::LtImm, a, imm) - } - "LeImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let imm = self.parse_i16()?; - encode_b(OpCode::LeImm, a, imm) - } - "GtImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let imm = self.parse_i16()?; - encode_b(OpCode::GtImm, a, imm) - } - "GeImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let imm = self.parse_i16()?; - encode_b(OpCode::GeImm, a, imm) - } - "WhileLoopLt" => { - let a = self.parse_register()?; - self.skip_comma()?; - let offset = self.parse_i16()?; - // Skip any trailing comment - while self.current != Token::Newline && self.current != Token::Eof { - self.advance()?; - } - encode_b(OpCode::WhileLoopLt, a, offset) - } - // Type-specialized integer opcodes - "AddII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::AddII, a, b, c) - } - "SubII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::SubII, a, b, c) - } - "MulII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::MulII, a, b, c) - } - "DivII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::DivII, a, b, c) - } - "ModII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::ModII, a, b, c) - } - // Type-specialized float opcodes - "AddFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::AddFF, a, b, c) - } - "SubFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::SubFF, a, b, c) - } - "MulFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::MulFF, a, b, c) - } - "DivFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::DivFF, a, b, c) - } - "ModFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::ModFF, a, b, c) - } - // Integer comparisons - "LtII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::LtII, a, b, c) - } - "LeII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::LeII, a, b, c) - } - "GtII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::GtII, a, b, c) - } - "GeII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::GeII, a, b, c) - } - "EqII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::EqII, a, b, c) - } - "NeII" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::NeII, a, b, c) - } - // Float comparisons - "LtFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::LtFF, a, b, c) - } - "LeFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::LeFF, a, b, c) - } - "GtFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::GtFF, a, b, c) - } - "GeFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::GeFF, a, b, c) - } - "EqFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::EqFF, a, b, c) - } - "NeFF" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - encode_a(OpCode::NeFF, a, b, c) - } - // Integer immediate comparisons - "LtIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::LtIImm, a, b, c) - } - "LeIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::LeIImm, a, b, c) - } - "GtIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::GtIImm, a, b, c) - } - "GeIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::GeIImm, a, b, c) - } - "CallCached" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::CallCached, a, b, c) - } - "CallGlobal" => { - // Format: CallGlobal r, , - // Followed by 2 cache words (emitted separately) - let dest = self.parse_register()?; - self.skip_comma()?; - let global_idx = self.parse_u8()?; - self.skip_comma()?; - let nargs = self.parse_u8()?; - extra_cache_words = 2; - encode_a(OpCode::CallGlobal, dest, global_idx, nargs) - } - "CallGlobalMono" => { - // Format: CallGlobalMono r, , - let dest = self.parse_register()?; - self.skip_comma()?; - let global_idx = self.parse_u8()?; - self.skip_comma()?; - let nargs = self.parse_u8()?; - extra_cache_words = 2; - encode_a(OpCode::CallGlobalMono, dest, global_idx, nargs) - } - "CallGlobalNative" => { - // Format: CallGlobalNative r, , - let dest = self.parse_register()?; - self.skip_comma()?; - let global_idx = self.parse_u8()?; - self.skip_comma()?; - let nargs = self.parse_u8()?; - extra_cache_words = 2; - encode_a(OpCode::CallGlobalNative, dest, global_idx, nargs) - } - "CallUpval" => { - // Format: CallUpval r, upval[N], - let dest = self.parse_register()?; - self.skip_comma()?; - let upval_idx = self.parse_upval_index()?; - self.skip_comma()?; - let nargs = self.parse_u8()?; - encode_a(OpCode::CallUpval, dest, upval_idx, nargs) - } - "TailCallUpval" => { - // Format: TailCallUpval r, upval[N], - let dest = self.parse_register()?; - self.skip_comma()?; - let upval_idx = self.parse_upval_index()?; - self.skip_comma()?; - let nargs = self.parse_u8()?; - encode_a(OpCode::TailCallUpval, dest, upval_idx, nargs) - } - "AddIIG" => self.parse_ternary_reg(OpCode::AddIIG)?, - "SubIIG" => self.parse_ternary_reg(OpCode::SubIIG)?, - "MulIIG" => self.parse_ternary_reg(OpCode::MulIIG)?, - "DivIIG" => self.parse_ternary_reg(OpCode::DivIIG)?, - "ModIIG" => self.parse_ternary_reg(OpCode::ModIIG)?, - "AddFFG" => self.parse_ternary_reg(OpCode::AddFFG)?, - "SubFFG" => self.parse_ternary_reg(OpCode::SubFFG)?, - "MulFFG" => self.parse_ternary_reg(OpCode::MulFFG)?, - "DivFFG" => self.parse_ternary_reg(OpCode::DivFFG)?, - "ModFFG" => self.parse_ternary_reg(OpCode::ModFFG)?, - "LtIIG" => self.parse_ternary_reg(OpCode::LtIIG)?, - "LeIIG" => self.parse_ternary_reg(OpCode::LeIIG)?, - "GtIIG" => self.parse_ternary_reg(OpCode::GtIIG)?, - "GeIIG" => self.parse_ternary_reg(OpCode::GeIIG)?, - "EqIIG" => self.parse_ternary_reg(OpCode::EqIIG)?, - "NeIIG" => self.parse_ternary_reg(OpCode::NeIIG)?, - "LtFFG" => self.parse_ternary_reg(OpCode::LtFFG)?, - "LeFFG" => self.parse_ternary_reg(OpCode::LeFFG)?, - "GtFFG" => self.parse_ternary_reg(OpCode::GtFFG)?, - "GeFFG" => self.parse_ternary_reg(OpCode::GeFFG)?, - "EqFFG" => self.parse_ternary_reg(OpCode::EqFFG)?, - "NeFFG" => self.parse_ternary_reg(OpCode::NeFFG)?, - "Shl" => self.parse_ternary_reg(OpCode::Shl)?, - "Shr" => self.parse_ternary_reg(OpCode::Shr)?, - "BitAnd" => self.parse_ternary_reg(OpCode::BitAnd)?, - "BitOr" => self.parse_ternary_reg(OpCode::BitOr)?, - "BitXor" => self.parse_ternary_reg(OpCode::BitXor)?, - "BitNot" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - encode_a(OpCode::BitNot, a, b, 0) - } - "ShlII" => self.parse_ternary_reg(OpCode::ShlII)?, - "ShrII" => self.parse_ternary_reg(OpCode::ShrII)?, - "AndII" => self.parse_ternary_reg(OpCode::AndII)?, - "OrII" => self.parse_ternary_reg(OpCode::OrII)?, - "XorII" => self.parse_ternary_reg(OpCode::XorII)?, - "NotI" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - encode_a(OpCode::NotI, a, b, 0) - } - "ShlIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::ShlIImm, a, b, c) - } - "ShrIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::ShrIImm, a, b, c) - } - "AndIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::AndIImm, a, b, c) - } - "OrIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::OrIImm, a, b, c) - } - "XorIImm" => { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_u8()?; - encode_a(OpCode::XorIImm, a, b, c) - } - _ => return Err(AssemblerError::UnknownOpcode(opcode_name)), - }; - - bytecode.push(instr); - if extra_cache_words > 0 { - bytecode.extend(std::iter::repeat_n(0, extra_cache_words)); - } - Ok(()) - } - - fn parse_ternary_reg(&mut self, op: OpCode) -> Result { - let a = self.parse_register()?; - self.skip_comma()?; - let b = self.parse_register()?; - self.skip_comma()?; - let c = self.parse_register()?; - Ok(encode_a(op, a, b, c)) - } - - fn parse_jump_target(&mut self) -> Result<(i16, Option)> { - match &self.current { - Token::LabelRef(name) => { - let label = name.clone(); - self.advance()?; - Ok((0, Some(label))) - } - Token::At => { - self.advance()?; - if let Token::Int(n) = self.advance()? { - Ok((n as i16, None)) - } else { - Err(AssemblerError::Expected { - expected: "offset".to_string(), - got: format!("{:?}", self.current), - }) - } - } - Token::Int(n) => { - let offset = *n; - self.advance()?; - Ok((offset as i16, None)) - } - _ => Err(AssemblerError::Expected { - expected: "label or offset".to_string(), - got: format!("{:?}", self.current), - }), - } - } - - /// Parse 'upval[N]' format and return N - pub(super) fn parse_upval_index(&mut self) -> Result { - // Expect 'upval' identifier - if let Token::Ident(s) = &self.current { - if s != "upval" { - return Err(AssemblerError::Expected { - expected: "upval".to_string(), - got: format!("{:?}", self.current), - }); - } - self.advance()?; - } else { - return Err(AssemblerError::Expected { - expected: "upval".to_string(), - got: format!("{:?}", self.current), - }); - } - - // Expect '[' - if self.current != Token::LBracket { - return Err(AssemblerError::Expected { - expected: "[".to_string(), - got: format!("{:?}", self.current), - }); - } - self.advance()?; - - // Parse the index - let idx = self.parse_u8()?; - - // Expect ']' - if self.current != Token::RBracket { - return Err(AssemblerError::Expected { - expected: "]".to_string(), - got: format!("{:?}", self.current), - }); - } - self.advance()?; - - Ok(idx) - } -} - -/// Encode a Format A instruction -pub(super) fn encode_a(op: OpCode, a: u8, b: u8, c: u8) -> u32 { - ((op as u32) << 24) | ((a as u32) << 16) | ((b as u32) << 8) | (c as u32) -} - -/// Encode a Format B instruction -pub(super) fn encode_b(op: OpCode, a: u8, imm: i16) -> u32 { - ((op as u32) << 24) | ((a as u32) << 16) | ((imm as u16) as u32) -} diff --git a/bytecode/src/bytecode/buffer.rs b/bytecode/src/bytecode/buffer.rs deleted file mode 100644 index b3e16a2..0000000 --- a/bytecode/src/bytecode/buffer.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::cell::UnsafeCell; -use std::sync::Arc; - -// Bytecode buffer with interior mutability for inline cache patching. -// Arc for shared ownership, UnsafeCell for patching without &mut. -// SAFETY: VM is single-threaded, patching happens during execution only. -// Would need AtomicU32 if we ever go multi-threaded -#[derive(Clone)] -pub struct BytecodeBuffer(Arc>>); - -impl std::fmt::Debug for BytecodeBuffer { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_tuple("BytecodeBuffer") - .field(&format!("[{} words]", self.len())) - .finish() - } -} - -impl BytecodeBuffer { - #[allow(clippy::arc_with_non_send_sync)] // Intentional: VM is single-threaded, see module comment - pub fn new(data: Box<[u32]>) -> Self { - Self(Arc::new(UnsafeCell::new(data))) - } - pub fn empty() -> Self { - Self::new(Box::new([])) - } - pub fn from_vec(v: Vec) -> Self { - Self::new(v.into_boxed_slice()) - } - - #[inline(always)] - pub fn len(&self) -> usize { - unsafe { (&*self.0.get()).len() } - } - - #[inline(always)] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - // raw pointers for dispatch loop - valid as long as buffer lives - #[inline(always)] - pub fn as_ptr(&self) -> *const u32 { - unsafe { (&*self.0.get()).as_ptr() } - } - - #[inline(always)] - pub fn as_mut_ptr(&self) -> *mut u32 { - unsafe { (&mut *self.0.get()).as_mut_ptr() } - } - - #[inline(always)] - pub fn read(&self, off: usize) -> u32 { - unsafe { *(&*self.0.get()).get_unchecked(off) } - } - - // for inline cache patching - #[inline(always)] - pub fn patch(&self, off: usize, val: u32) { - unsafe { - *(&mut *self.0.get()).get_unchecked_mut(off) = val; - } - } - - pub fn as_slice(&self) -> &[u32] { - unsafe { &*self.0.get() } - } - - pub fn iter(&self) -> impl Iterator { - self.as_slice().iter() - } -} - -impl std::ops::Index for BytecodeBuffer { - type Output = u32; - - fn index(&self, index: usize) -> &Self::Output { - &self.as_slice()[index] - } -} - -impl PartialEq for BytecodeBuffer { - fn eq(&self, other: &Self) -> bool { - self.as_slice() == other.as_slice() - } -} - -impl From> for BytecodeBuffer { - fn from(v: Vec) -> Self { - Self::from_vec(v) - } -} - -impl From> for BytecodeBuffer { - fn from(arc: Arc<[u32]>) -> Self { - Self::new(arc.to_vec().into_boxed_slice()) - } -} - -// NOTE: intentionally NOT Send/Sync due to UnsafeCell patching. -// Use AtomicU32 if multi-threading is ever needed. diff --git a/bytecode/src/bytecode/decode.rs b/bytecode/src/bytecode/decode.rs deleted file mode 100644 index c5ad62b..0000000 --- a/bytecode/src/bytecode/decode.rs +++ /dev/null @@ -1,25 +0,0 @@ -use super::opcode::OpCode; - -// instruction formats: -// A: op(8) | a(8) | b(8) | c(8) - 3 regs -// B: op(8) | a(8) | imm(16) - reg + signed immediate -// C: same as A - just different semantics (call) - -pub fn decode_a(instr: u32) -> (OpCode, u8, u8, u8) { - let op = OpCode::from_u8((instr >> 24) as u8).unwrap_or(OpCode::Move); - ( - op, - ((instr >> 16) & 0xFF) as u8, - ((instr >> 8) & 0xFF) as u8, - (instr & 0xFF) as u8, - ) -} - -pub fn decode_b(instr: u32) -> (OpCode, u8, i16) { - let op = OpCode::from_u8((instr >> 24) as u8).unwrap_or(OpCode::Move); - (op, ((instr >> 16) & 0xFF) as u8, (instr & 0xFFFF) as i16) -} - -pub fn decode_c(instr: u32) -> (OpCode, u8, u8, u8) { - decode_a(instr) -} diff --git a/bytecode/src/bytecode/function/constants.rs b/bytecode/src/bytecode/function/constants.rs deleted file mode 100644 index 9d855ec..0000000 --- a/bytecode/src/bytecode/function/constants.rs +++ /dev/null @@ -1,60 +0,0 @@ -use super::Function; -use crate::value::Value; -use std::collections::HashMap; - -impl Function { - /// Compute and set the global_layout_hash from global layout names - pub fn compute_global_layout_hash(&mut self) { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - - if self.global_layout.names().is_empty() { - self.global_layout_hash = 0; - } else { - let mut hasher = DefaultHasher::new(); - self.global_layout.names().hash(&mut hasher); - self.global_layout_hash = hasher.finish() | 1; - } - } - - /// Add a constant and return its index - pub fn add_constant(&mut self, value: Value) -> u16 { - for (i, existing) in self.constants.iter().enumerate() { - if *existing == value { - return i as u16; - } - } - - let idx = self.constants.len() as u16; - self.constants.push(value); - idx - } - - /// Add a nested function and return a special constant index for it - pub fn add_constant_function(&mut self, func: Function) -> u16 { - let func_idx = self.nested_functions.len(); - self.nested_functions.push(func); - - // Use dedicated tag to avoid collision with heap pointers - let marker = Value::nested_fn_marker(func_idx); - - let idx = self.constants.len() as u16; - self.constants.push(marker); - idx - } - - /// Remap GcRef pointers in constants using the provided mapping. - pub fn remap_constants(&mut self, remap: &HashMap) { - for constant in &mut self.constants { - if let Some(old_idx) = constant.as_ptr() - && let Some(&new_idx) = remap.get(&old_idx) - { - *constant = Value::ptr(new_idx); - } - } - - for nested_func in &mut self.nested_functions { - nested_func.remap_constants(remap); - } - } -} diff --git a/bytecode/src/bytecode/function/lines.rs b/bytecode/src/bytecode/function/lines.rs deleted file mode 100644 index 57d87c9..0000000 --- a/bytecode/src/bytecode/function/lines.rs +++ /dev/null @@ -1,33 +0,0 @@ -use super::Function; - -impl Function { - pub(super) fn add_line(&mut self, line: u32) { - if let Some((count, last_line)) = self.lines.last_mut() - && *last_line == line - && *count < u16::MAX - { - *count += 1; - return; - } - self.lines.push((1, line)); - } - - /// Record line info for multiple words (used for cache words after CallGlobal) - pub fn record_lines(&mut self, count: usize, line: u32) { - for _ in 0..count { - self.add_line(line); - } - } - - /// Get line number for an instruction index - pub fn get_line(&self, idx: usize) -> u32 { - let mut offset = 0usize; - for &(count, line) in &self.lines { - offset += count as usize; - if idx < offset { - return line; - } - } - 0 - } -} diff --git a/bytecode/src/bytecode/function/mod.rs b/bytecode/src/bytecode/function/mod.rs deleted file mode 100644 index fa56038..0000000 --- a/bytecode/src/bytecode/function/mod.rs +++ /dev/null @@ -1,112 +0,0 @@ -use super::buffer::BytecodeBuffer; -use super::global_layout::GlobalLayout; -use super::opcode::OpCode; -use super::upvalue::UpvalueDescriptor; -use crate::value::Value; -use std::sync::Arc; - -mod constants; -mod lines; -mod patch; -mod registers; -mod storage; - -// A compiled function. BytecodeBuffer allows patching for inline caches -// while keeping raw pointers stable (important for dispatch loop perf). -#[derive(Debug, Clone)] -pub struct Function { - pub name: Option, - pub arity: u8, - pub num_registers: u8, - pub call_site_count: u16, // for MIC pre-allocation - pub bytecode: BytecodeBuffer, - bytecode_builder: Vec, // temp storage during compilation - pub constants: Vec, - pub nested_functions: Vec, - pub upvalue_descriptors: Vec, - pub lines: Vec<(u16, u32)>, - pub global_layout: Arc, - pub global_layout_hash: u64, -} - -impl Function { - pub fn new(name: Option, arity: u8) -> Self { - Self { - name, - arity, - num_registers: 0, - call_site_count: 0, - bytecode: BytecodeBuffer::empty(), - bytecode_builder: Vec::new(), - constants: Vec::new(), - nested_functions: Vec::new(), - upvalue_descriptors: Vec::new(), - lines: Vec::new(), - global_layout: GlobalLayout::empty(), - global_layout_hash: 0, - } - } - - pub fn finalize_bytecode(&mut self) { - if !self.bytecode_builder.is_empty() { - self.bytecode = BytecodeBuffer::from_vec(std::mem::take(&mut self.bytecode_builder)); - } - // make sure we have enough registers for the bytecode - let needed = registers::required_registers(self.bytecode.as_slice()); - if needed > self.num_registers as usize { - self.num_registers = needed.min(255) as u8; - } - for f in &mut self.nested_functions { - f.finalize_bytecode(); - } - } - - // format A: op|a|b|c (3 regs) - pub fn emit_a(&mut self, op: OpCode, a: u8, b: u8, c: u8, line: u32) { - self.emit_raw( - ((op as u32) << 24) | ((a as u32) << 16) | ((b as u32) << 8) | c as u32, - line, - ); - } - - // format B: op|a|imm16 - pub fn emit_b(&mut self, op: OpCode, a: u8, imm: i16, line: u32) { - self.emit_raw( - ((op as u32) << 24) | ((a as u32) << 16) | (imm as u16) as u32, - line, - ); - } - - // format C: same layout as A but semantics are dest|func|nargs - pub fn emit_c(&mut self, op: OpCode, dest: u8, func: u8, nargs: u8, line: u32) { - self.emit_a(op, dest, func, nargs, line); - } - - fn emit_raw(&mut self, instr: u32, line: u32) { - self.bytecode_builder.push(instr); - self.add_line(line); - } - - /// Strip debug information for release builds. - pub fn strip_debug_info(&mut self) { - self.name = None; - self.lines.clear(); - let names = self.global_layout.names(); - if !names.is_empty() { - let stripped: Vec = names - .iter() - .map(|name| { - if name.contains("::") { - name.clone() - } else { - String::new() - } - }) - .collect(); - self.global_layout = GlobalLayout::new(stripped); - } - for nested in &mut self.nested_functions { - nested.strip_debug_info(); - } - } -} diff --git a/bytecode/src/bytecode/function/patch.rs b/bytecode/src/bytecode/function/patch.rs deleted file mode 100644 index ca2d2e4..0000000 --- a/bytecode/src/bytecode/function/patch.rs +++ /dev/null @@ -1,12 +0,0 @@ -use super::Function; - -impl Function { - /// Patch a jump instruction at the given offset - pub fn patch_jump(&mut self, offset: usize) { - let jump_dist = (self.bytecode_builder.len() - offset - 1) as i16; - let instr = self.bytecode_builder[offset]; - let op = instr >> 24; - let a = (instr >> 16) & 0xFF; - self.bytecode_builder[offset] = (op << 24) | (a << 16) | ((jump_dist as u16) as u32); - } -} diff --git a/bytecode/src/bytecode/function/registers.rs b/bytecode/src/bytecode/function/registers.rs deleted file mode 100644 index 7eed1e1..0000000 --- a/bytecode/src/bytecode/function/registers.rs +++ /dev/null @@ -1,359 +0,0 @@ -use crate::bytecode::OpCode; -use crate::bytecode::decode_a; - -pub(super) fn required_registers(bytecode: &[u32]) -> usize { - let mut max_reg: usize = 0; - let mut used = false; - let mut ip = 0; - - while ip < bytecode.len() { - let instr = bytecode[ip]; - let (op, a, b, c) = decode_a(instr); - let imm = (instr & 0xFFFF) as i16; - - match op { - OpCode::Move => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None) - } - OpCode::LoadI | OpCode::LoadNull | OpCode::LoadBool | OpCode::LoadK => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::Add - | OpCode::Sub - | OpCode::Mul - | OpCode::Div - | OpCode::Mod - | OpCode::Eq - | OpCode::Ne - | OpCode::Lt - | OpCode::Le - | OpCode::Gt - | OpCode::Ge - | OpCode::AddII - | OpCode::SubII - | OpCode::MulII - | OpCode::DivII - | OpCode::ModII - | OpCode::AddFF - | OpCode::SubFF - | OpCode::MulFF - | OpCode::DivFF - | OpCode::ModFF - | OpCode::LtII - | OpCode::LeII - | OpCode::GtII - | OpCode::GeII - | OpCode::EqII - | OpCode::NeII - | OpCode::LtFF - | OpCode::LeFF - | OpCode::GtFF - | OpCode::GeFF - | OpCode::EqFF - | OpCode::NeFF - | OpCode::AddIIG - | OpCode::SubIIG - | OpCode::MulIIG - | OpCode::DivIIG - | OpCode::ModIIG - | OpCode::AddFFG - | OpCode::SubFFG - | OpCode::MulFFG - | OpCode::DivFFG - | OpCode::ModFFG - | OpCode::LtIIG - | OpCode::LeIIG - | OpCode::GtIIG - | OpCode::GeIIG - | OpCode::EqIIG - | OpCode::NeIIG - | OpCode::LtFFG - | OpCode::LeFFG - | OpCode::GtFFG - | OpCode::GeFFG - | OpCode::EqFFG - | OpCode::NeFFG - | OpCode::Shl - | OpCode::Shr - | OpCode::BitAnd - | OpCode::BitOr - | OpCode::BitXor - | OpCode::ShlII - | OpCode::ShrII - | OpCode::AndII - | OpCode::OrII - | OpCode::XorII => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - OpCode::ShlIImm - | OpCode::ShrIImm - | OpCode::AndIImm - | OpCode::OrIImm - | OpCode::XorIImm => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - OpCode::Neg | OpCode::Not | OpCode::BitNot | OpCode::NotI => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - OpCode::Jump => { - let _ = imm; - } - OpCode::JumpIf | OpCode::JumpIfNot => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::Call => { - let nargs = c as usize; - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - if nargs > 0 { - update_max_reg(&mut max_reg, &mut used, (b as usize) + nargs, None, None); - } - } - OpCode::Return => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::Return0 => {} - OpCode::GetGlobal | OpCode::SetGlobal | OpCode::IncGlobalI => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::EnterNoGc | OpCode::ExitNoGc => {} - OpCode::Alloc => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - OpCode::Free => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::LoadMem => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - OpCode::LoadMemI => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - OpCode::StoreMem => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - OpCode::StoreMemI => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(c as usize), None); - } - OpCode::Print => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::MakeClosure | OpCode::GetUpval | OpCode::CloseUpvals => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::SetUpval => { - update_max_reg(&mut max_reg, &mut used, b as usize, None, None); - } - OpCode::ForLoopI | OpCode::ForLoopIInc => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some((a as usize) + 1), - Some((a as usize) + 2), - ); - } - OpCode::AddI | OpCode::SubI => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - OpCode::LtImm | OpCode::LeImm | OpCode::GtImm | OpCode::GeImm => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - OpCode::WhileLoopLt => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some((a as usize) + 1), - None, - ); - } - OpCode::LtIImm | OpCode::LeIImm | OpCode::GtIImm | OpCode::GeIImm => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - OpCode::GetGlobalIdx | OpCode::SetGlobalIdx => { - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - } - OpCode::CallGlobal | OpCode::CallGlobalMono | OpCode::CallGlobalNative => { - let nargs = c as usize; - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - if nargs > 0 { - update_max_reg(&mut max_reg, &mut used, (a as usize) + nargs, None, None); - } - ip += 2; // skip cache words - } - OpCode::CallCached => { - let nargs = c as usize; - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - if nargs > 0 { - update_max_reg(&mut max_reg, &mut used, (b as usize) + nargs, None, None); - } - } - OpCode::CallUpval | OpCode::TailCallUpval => { - let nargs = c as usize; - update_max_reg(&mut max_reg, &mut used, a as usize, None, None); - if nargs > 0 { - update_max_reg(&mut max_reg, &mut used, (a as usize) + nargs, None, None); - } - } - - // Array operations - dest, count - OpCode::ArrayNewI | OpCode::ArrayNewF | OpCode::ArrayNewB | OpCode::ArrayNewP => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - // Array literal - dest, start, count (uses regs start..start+count) - OpCode::ArrayLit | OpCode::VecLit => { - let count = c as usize; - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - if count > 0 { - update_max_reg( - &mut max_reg, - &mut used, - (b as usize) + count - 1, - None, - None, - ); - } - } - // Array load/get/store - all use 3 registers - OpCode::ArrayLoadI - | OpCode::ArrayLoadF - | OpCode::ArrayLoadB - | OpCode::ArrayLoadP - | OpCode::ArrayGetI - | OpCode::ArrayGetF - | OpCode::ArrayGetB - | OpCode::ArrayGetP - | OpCode::ArrayStoreI - | OpCode::ArrayStoreF - | OpCode::ArrayStoreB - | OpCode::ArrayStoreP => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - // Array/Vec length - dest, arr - OpCode::ArrayLen | OpCode::VecLen | OpCode::VecCap => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - - // Vec operations - dest, cap - OpCode::VecNewI | OpCode::VecNewF | OpCode::VecNewB | OpCode::VecNewP => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - // Vec push - vec, val - OpCode::VecPushI | OpCode::VecPushF | OpCode::VecPushB | OpCode::VecPushP => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - // Vec pop - dest, vec - OpCode::VecPopI | OpCode::VecPopF | OpCode::VecPopB | OpCode::VecPopP => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - // Vec reserve - vec, cap - OpCode::VecReserve => { - update_max_reg(&mut max_reg, &mut used, a as usize, Some(b as usize), None); - } - // Vec load - dest, vec, idx (3 registers) - OpCode::VecLoadI | OpCode::VecLoadF | OpCode::VecLoadB | OpCode::VecLoadP => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - // Vec get (safe) - dest, vec, idx (3 registers) - OpCode::VecGetI | OpCode::VecGetF | OpCode::VecGetB | OpCode::VecGetP => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - // Vec store - vec, idx, val (3 registers) - OpCode::VecStoreI | OpCode::VecStoreF | OpCode::VecStoreB | OpCode::VecStoreP => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - // String load char - dest, string, index (3 registers) - OpCode::StringLoadChar => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize, - Some(b as usize), - Some(c as usize), - ); - } - // String for loop - uses consecutive regs [char_result(a), byte_offset(a+1), string_ptr(a+2)] - OpCode::StringForLoop => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize + 2, - Some(a as usize + 1), - Some(a as usize), - ); - } - // Vec/Array for loop - uses consecutive regs [element(a), index(a+1), collection_ptr(a+2)] - OpCode::VecForLoop | OpCode::ArrayForLoop => { - update_max_reg( - &mut max_reg, - &mut used, - a as usize + 2, - Some(a as usize + 1), - Some(a as usize), - ); - } - } - ip += 1; - } - - if used { max_reg + 1 } else { 0 } -} - -fn update_max_reg( - max_reg: &mut usize, - used: &mut bool, - a: usize, - b: Option, - c: Option, -) { - *used = true; - *max_reg = (*max_reg).max(a); - if let Some(b) = b { - *max_reg = (*max_reg).max(b); - } - if let Some(c) = c { - *max_reg = (*max_reg).max(c); - } -} diff --git a/bytecode/src/bytecode/function/storage.rs b/bytecode/src/bytecode/function/storage.rs deleted file mode 100644 index 3e4ba11..0000000 --- a/bytecode/src/bytecode/function/storage.rs +++ /dev/null @@ -1,44 +0,0 @@ -use super::Function; -use crate::bytecode::{BytecodeBuffer, OpCode}; - -impl Function { - /// Set bytecode directly from a Vec (for assembler/binary loading). - pub fn set_bytecode(&mut self, bytecode: Vec) { - self.bytecode = BytecodeBuffer::from_vec(bytecode); - self.bytecode_builder.clear(); - } - - /// Push a raw instruction to the bytecode builder. - pub fn push_raw(&mut self, instr: u32) { - self.bytecode_builder.push(instr); - } - - /// Get a mutable reference to a bytecode instruction at the given index. - pub fn bytecode_mut(&mut self, index: usize) -> &mut u32 { - &mut self.bytecode_builder[index] - } - - /// Get an immutable reference to a bytecode instruction at the given index. - pub fn bytecode_at(&self, index: usize) -> u32 { - self.bytecode_builder[index] - } - - /// Get current instruction count (for jump patching) - pub fn current_offset(&self) -> usize { - self.bytecode_builder.len() - } - - /// Emit a jump and return its offset for later patching - pub fn emit_jump(&mut self, op: OpCode, line: u32) -> usize { - let offset = self.current_offset(); - self.emit_b(op, 0, 0, line); - offset - } - - /// Emit a conditional jump and return its offset - pub fn emit_jump_if(&mut self, op: OpCode, reg: u8, line: u32) -> usize { - let offset = self.current_offset(); - self.emit_b(op, reg, 0, line); - offset - } -} diff --git a/bytecode/src/bytecode/global_layout.rs b/bytecode/src/bytecode/global_layout.rs deleted file mode 100644 index e2f3169..0000000 --- a/bytecode/src/bytecode/global_layout.rs +++ /dev/null @@ -1,63 +0,0 @@ -use std::collections::HashMap; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::{Arc, Mutex, OnceLock}; - -#[derive(Debug)] -pub struct GlobalLayout { - id: usize, - names: Vec, -} - -static GLOBAL_LAYOUT_ID: AtomicUsize = AtomicUsize::new(1); -static GLOBAL_LAYOUT_CACHE: OnceLock, Arc>>> = - OnceLock::new(); -static EMPTY_LAYOUT: OnceLock> = OnceLock::new(); - -impl GlobalLayout { - pub fn new(names: Vec) -> Arc { - if names.is_empty() { - return Self::empty(); - } - let cache = GLOBAL_LAYOUT_CACHE.get_or_init(|| Mutex::new(HashMap::new())); - let mut guard = cache.lock().unwrap_or_else(|e| e.into_inner()); - if let Some(existing) = guard.get(&names) { - return Arc::clone(existing); - } - let id = GLOBAL_LAYOUT_ID.fetch_add(1, Ordering::Relaxed); - let layout = Arc::new(Self { - id, - names: names.clone(), - }); - guard.insert(names, Arc::clone(&layout)); - layout - } - - pub fn empty() -> Arc { - EMPTY_LAYOUT - .get_or_init(|| { - Arc::new(Self { - id: 0, - names: Vec::new(), - }) - }) - .clone() - } - - // create a layout with placeholder names (for release builds that strip debug info) - pub fn new_anonymous(count: usize) -> Arc { - if count == 0 { - return Self::empty(); - } - let names = vec![String::new(); count]; - let id = GLOBAL_LAYOUT_ID.fetch_add(1, Ordering::Relaxed); - Arc::new(Self { id, names }) - } - - pub fn id(&self) -> usize { - self.id - } - - pub fn names(&self) -> &[String] { - &self.names - } -} diff --git a/bytecode/src/bytecode/mod.rs b/bytecode/src/bytecode/mod.rs deleted file mode 100644 index 315c90a..0000000 --- a/bytecode/src/bytecode/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -// bytecode format and instruction encoding - -mod buffer; -mod decode; -mod function; -mod global_layout; -mod opcode; -mod upvalue; - -pub use buffer::BytecodeBuffer; -pub use decode::{decode_a, decode_b, decode_c}; -pub use function::Function; -pub use global_layout::GlobalLayout; -pub use opcode::OpCode; -pub use upvalue::UpvalueDescriptor; diff --git a/bytecode/src/bytecode/opcode.rs b/bytecode/src/bytecode/opcode.rs deleted file mode 100644 index 9223f1b..0000000 --- a/bytecode/src/bytecode/opcode.rs +++ /dev/null @@ -1,189 +0,0 @@ -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u8)] -pub enum OpCode { - Move = 0, - LoadI, - LoadK, - LoadNull, - LoadBool, - Add, - Sub, - Mul, - Div, - Mod, - Neg, - Eq, - Ne, - Lt, - Le, - Gt, - Ge, - Not, - Jump, - JumpIf, - JumpIfNot, - Call, - Return, - Return0, - GetGlobal, - SetGlobal, - EnterNoGc, - ExitNoGc, - Alloc, - Free, - LoadMem, - LoadMemI, - StoreMem, - StoreMemI, - Print, - MakeClosure, - GetUpval, - SetUpval, - CloseUpvals, - IncGlobalI, - ForLoopI, - ForLoopIInc, - AddI, - SubI, - LtImm, - LeImm, - GtImm, - GeImm, - WhileLoopLt, - AddII, - SubII, - MulII, - DivII, - ModII, - AddFF, - SubFF, - MulFF, - DivFF, - ModFF, - LtII, - LeII, - GtII, - GeII, - EqII, - NeII, - LtFF, - LeFF, - GtFF, - GeFF, - EqFF, - NeFF, - LtIImm, - LeIImm, - GtIImm, - GeIImm, - GetGlobalIdx, - SetGlobalIdx, - CallGlobal, - CallGlobalMono, - CallCached, - CallUpval, - TailCallUpval, - AddIIG, - SubIIG, - MulIIG, - DivIIG, - ModIIG, - AddFFG, - SubFFG, - MulFFG, - DivFFG, - ModFFG, - LtIIG, - LeIIG, - GtIIG, - GeIIG, - EqIIG, - NeIIG, - LtFFG, - LeFFG, - GtFFG, - GeFFG, - EqFFG, - NeFFG, - CallGlobalNative, - Shl, - Shr, - BitAnd, - BitOr, - BitXor, - BitNot, - ShlII, - ShrII, - AndII, - OrII, - XorII, - NotI, - ShlIImm, - ShrIImm, - AndIImm, - OrIImm, - XorIImm, - - ArrayNewI = 130, - ArrayNewF, - ArrayNewB, - ArrayNewP, - ArrayLit, - ArrayLoadI, - ArrayLoadF, - ArrayLoadB, - ArrayLoadP, - ArrayGetI, - ArrayGetF, - ArrayGetB, - ArrayGetP, - ArrayStoreI, - ArrayStoreF, - ArrayStoreB, - ArrayStoreP, - ArrayLen, - - VecNewI, - VecNewF, - VecNewB, - VecNewP, - VecLit, - VecPushI, - VecPushF, - VecPushB, - VecPushP, - VecPopI, - VecPopF, - VecPopB, - VecPopP, - VecLen, - VecCap, - VecReserve, - VecLoadI, - VecLoadF, - VecLoadB, - VecLoadP, - VecGetI, - VecGetF, - VecGetB, - VecGetP, - VecStoreI, - VecStoreF, - VecStoreB, - VecStoreP, - - StringLoadChar = 176, - StringForLoop, - VecForLoop, - ArrayForLoop, -} - -impl OpCode { - pub fn from_u8(byte: u8) -> Option { - if byte <= Self::ArrayForLoop as u8 { - Some(unsafe { std::mem::transmute::(byte) }) - } else { - None - } - } -} diff --git a/bytecode/src/bytecode/upvalue.rs b/bytecode/src/bytecode/upvalue.rs deleted file mode 100644 index ccc93ff..0000000 --- a/bytecode/src/bytecode/upvalue.rs +++ /dev/null @@ -1,9 +0,0 @@ -/// Describes how to capture an upvalue for a closure. -#[derive(Debug, Clone, Copy)] -pub struct UpvalueDescriptor { - /// If true, capture from enclosing function's locals (register). - /// If false, capture from enclosing function's upvalues. - pub is_local: bool, - /// The index: register number if is_local, upvalue index otherwise. - pub index: u8, -} diff --git a/bytecode/src/heap/access.rs b/bytecode/src/heap/access.rs deleted file mode 100644 index 0e7a8ee..0000000 --- a/bytecode/src/heap/access.rs +++ /dev/null @@ -1,61 +0,0 @@ -use super::Heap; -use crate::object::{GcObject, GcRef, ObjectKind}; - -impl Heap { - pub fn get(&self, gc_ref: GcRef) -> Option<&GcObject> { - self.objects.get(gc_ref.index())?.as_ref() - } - - /// Get a GC object without bounds checking. - /// - /// # Safety - /// - The caller must guarantee that `gc_ref.index()` is within bounds of `self.objects`. - /// - The slot at `gc_ref.index()` must contain a valid object (not None). - // TODO: Future optimization for faster heap access in hot paths. - #[allow(dead_code)] - #[inline(always)] - pub unsafe fn get_unchecked(&self, gc_ref: GcRef) -> &GcObject { - unsafe { - self.objects - .get_unchecked(gc_ref.index()) - .as_ref() - .unwrap_unchecked() - } - } - - pub fn get_mut(&mut self, gc_ref: GcRef) -> Option<&mut GcObject> { - self.objects.get_mut(gc_ref.index())?.as_mut() - } - - pub fn get_type_name(&self, gc_ref: GcRef) -> &'static str { - if let Some(obj) = self.get(gc_ref) { - match &obj.kind { - ObjectKind::String(_) => "String", - ObjectKind::Function(_) => "Function", - ObjectKind::Native(_) => "NativeFunction", - ObjectKind::Upvalue(_) => "Upvalue", - ObjectKind::Closure(_) => "Closure", - ObjectKind::Array(_) => "Array", - ObjectKind::Vec(_) => "Vec", - } - } else { - "Unknown" - } - } - - pub fn should_collect(&self) -> bool { - self.bytes_allocated >= self.next_gc - } - - pub fn bytes_allocated(&self) -> usize { - self.bytes_allocated - } - - pub fn next_gc_threshold(&self) -> usize { - self.next_gc - } - - pub fn object_count(&self) -> usize { - self.objects.iter().filter(|o| o.is_some()).count() - } -} diff --git a/bytecode/src/heap/alloc.rs b/bytecode/src/heap/alloc.rs deleted file mode 100644 index df157e5..0000000 --- a/bytecode/src/heap/alloc.rs +++ /dev/null @@ -1,40 +0,0 @@ -use super::Heap; -use crate::Function; -use crate::object::{AelysFunction, AelysString, GcObject, GcRef, NativeFunction, ObjectKind}; - -impl Heap { - pub fn alloc(&mut self, obj: GcObject) -> GcRef { - self.bytes_allocated += Self::estimate_object_size(&obj); - - // reuse free slots when possible - if let Some(idx) = self.free_list.pop() { - self.objects[idx] = Some(obj); - GcRef::new(idx) - } else { - let idx = self.objects.len(); - self.objects.push(Some(obj)); - GcRef::new(idx) - } - } - - pub fn alloc_string(&mut self, s: &str) -> GcRef { - self.alloc(GcObject::new(ObjectKind::String(AelysString::new(s)))) - } - - pub fn alloc_function(&mut self, func: Function) -> GcRef { - self.alloc(GcObject::new(ObjectKind::Function(AelysFunction::new( - func, - )))) - } - - pub fn alloc_native(&mut self, name: &str, arity: u8) -> GcRef { - self.alloc(GcObject::new(ObjectKind::Native(NativeFunction::new( - name, arity, - )))) - } - - // same as alloc_native, just different name for clarity in calling code - pub fn alloc_foreign(&mut self, name: &str, arity: u8) -> GcRef { - self.alloc_native(name, arity) - } -} diff --git a/bytecode/src/heap/gc.rs b/bytecode/src/heap/gc.rs deleted file mode 100644 index e22e9c4..0000000 --- a/bytecode/src/heap/gc.rs +++ /dev/null @@ -1,109 +0,0 @@ -use super::Heap; -use crate::object::{AelysClosure, AelysString, AelysUpvalue, GcRef, ObjectKind}; -use crate::value::Value; - -impl Heap { - // mark-sweep GC, nothing fancy. worklist avoids recursion (stack overflow on deep graphs) - pub fn mark(&mut self, root: GcRef) { - let mut worklist = vec![root]; - - while let Some(r) = worklist.pop() { - let should_trace = self - .get_mut(r) - .map(|o| { - if o.marked { - false - } else { - o.marked = true; - true - } - }) - .unwrap_or(false); - - if should_trace && let Some(obj) = self.get(r) { - match &obj.kind { - ObjectKind::Function(f) => { - for v in &f.function.constants { - if let Some(p) = v.as_ptr() { - worklist.push(GcRef::new(p)); - } - } - } - ObjectKind::Closure(c) => { - worklist.push(c.function); - worklist.extend(c.upvalues.iter().cloned()); - } - ObjectKind::Upvalue(u) => { - if let crate::object::UpvalueLocation::Closed(v) = &u.location - && let Some(p) = v.as_ptr() - { - worklist.push(GcRef::new(p)); - } - } - ObjectKind::String(_) | ObjectKind::Native(_) => {} - ObjectKind::Array(a) => { - if let Some(objs) = a.data.as_objects() { - for v in objs { - if let Some(p) = v.as_ptr() { - worklist.push(GcRef::new(p)); - } - } - } - } - ObjectKind::Vec(vec) => { - if let Some(objs) = vec.objects() { - for v in objs { - if let Some(p) = v.as_ptr() { - worklist.push(GcRef::new(p)); - } - } - } - } - } - } - } - } - - pub fn sweep(&mut self) -> usize { - let mut freed = 0; - - for (idx, slot) in self.objects.iter_mut().enumerate() { - let should_free = slot.as_ref().map(|o| !o.marked).unwrap_or(false); - if let Some(obj) = slot.as_mut() { - obj.marked = false; - } - - if should_free && let Some(obj) = slot.take() { - self.bytes_allocated = self - .bytes_allocated - .saturating_sub(Self::estimate_object_size(&obj)); - if let ObjectKind::String(s) = &obj.kind { - self.intern_table.remove(&s.hash()); - } - self.free_list.push(idx); - freed += 1; - } - } - - // grow threshold after collection - self.next_gc = - (self.bytes_allocated * Self::GC_GROWTH_FACTOR).max(Self::INITIAL_GC_THRESHOLD); - freed - } - - pub fn estimate_object_size(obj: &crate::object::GcObject) -> usize { - match &obj.kind { - ObjectKind::String(s) => std::mem::size_of::() + s.len(), - ObjectKind::Function(f) => { - std::mem::size_of::() - + f.function.bytecode.len() * 4 - + f.function.constants.len() * std::mem::size_of::() - } - ObjectKind::Native(_) => std::mem::size_of::(), - ObjectKind::Upvalue(_) => std::mem::size_of::(), - ObjectKind::Closure(c) => std::mem::size_of::() + c.upvalues.len() * 8, - ObjectKind::Array(a) => a.size_bytes(), - ObjectKind::Vec(v) => v.size_bytes(), - } - } -} diff --git a/bytecode/src/heap/merge.rs b/bytecode/src/heap/merge.rs deleted file mode 100644 index 795c53f..0000000 --- a/bytecode/src/heap/merge.rs +++ /dev/null @@ -1,28 +0,0 @@ -use super::Heap; -use crate::object::GcRef; - -impl Heap { - /// Merge another heap into this heap. - pub fn merge(&mut self, other: &mut Heap) -> std::collections::HashMap { - let mut remap = std::collections::HashMap::new(); - - for (old_idx, slot) in other.objects.iter_mut().enumerate() { - if let Some(obj) = slot.take() { - let new_ref = self.alloc(obj); - remap.insert(old_idx, new_ref.index()); - } - } - - for (hash, old_ref) in other.intern_table.drain() { - if let Some(&new_idx) = remap.get(&old_ref.index()) { - self.intern_table.entry(hash).or_insert(GcRef::new(new_idx)); - } - } - - other.objects.clear(); - other.free_list.clear(); - other.bytes_allocated = 0; - - remap - } -} diff --git a/bytecode/src/heap/mod.rs b/bytecode/src/heap/mod.rs deleted file mode 100644 index 339abe4..0000000 --- a/bytecode/src/heap/mod.rs +++ /dev/null @@ -1,50 +0,0 @@ -// gc heap for bytecode constants and runtime objects - -mod access; -mod alloc; -mod gc; -mod merge; -mod strings; - -use crate::object::{GcObject, GcRef}; -use std::collections::HashMap; - -pub struct Heap { - objects: Vec>, - free_list: Vec, - bytes_allocated: usize, - next_gc: usize, - intern_table: HashMap, // string interning -} - -impl Heap { - pub const INITIAL_GC_THRESHOLD: usize = 1024 * 1024; // 1MB - const GC_GROWTH_FACTOR: usize = 2; - - pub fn new() -> Self { - Self { - objects: Vec::new(), - free_list: Vec::new(), - bytes_allocated: 0, - next_gc: Self::INITIAL_GC_THRESHOLD, - intern_table: HashMap::new(), - } - } - - pub fn estimate_string_size(len: usize) -> usize { - std::mem::size_of::() + len - } -} - -impl Default for Heap { - fn default() -> Self { - Self::new() - } -} - -// clone gives you a fresh heap, not a copy (objects aren't clonable) -impl Clone for Heap { - fn clone(&self) -> Self { - Self::new() - } -} diff --git a/bytecode/src/heap/strings.rs b/bytecode/src/heap/strings.rs deleted file mode 100644 index c666258..0000000 --- a/bytecode/src/heap/strings.rs +++ /dev/null @@ -1,40 +0,0 @@ -use super::Heap; -use crate::object::{AelysString, GcObject, GcRef, ObjectKind}; - -impl Heap { - // lookup only, doesn't insert - pub fn find_interned_string(&self, s: &str) -> Option { - let r = *self.intern_table.get(&Self::fnv1a_hash(s.as_bytes()))?; - match self.get(r)?.kind { - ObjectKind::String(ref existing) if existing.as_str() == s => Some(r), - _ => None, - } - } - - // intern or return existing - pub fn intern_string(&mut self, s: &str) -> GcRef { - let hash = Self::fnv1a_hash(s.as_bytes()); - - // check if already interned - if let Some(&r) = self.intern_table.get(&hash) - && let Some(obj) = self.get(r) - && let ObjectKind::String(ref existing) = obj.kind - && existing.as_str() == s - { - return r; - } - - let r = self.alloc(GcObject::new(ObjectKind::String(AelysString::new(s)))); - self.intern_table.insert(hash, r); - r - } - - // FNV-1a - simple and fast enough for string interning - pub fn fnv1a_hash(bytes: &[u8]) -> u64 { - let mut h = 0xcbf29ce484222325u64; - for &b in bytes { - h = (h ^ b as u64).wrapping_mul(0x100000001b3); - } - h - } -} diff --git a/bytecode/src/lib.rs b/bytecode/src/lib.rs deleted file mode 100644 index 8a63f3e..0000000 --- a/bytecode/src/lib.rs +++ /dev/null @@ -1,13 +0,0 @@ -// bytecode format and asm/disasm - -pub mod asm; -pub mod bytecode; -pub mod heap; -pub mod object; -pub mod value; - -pub use asm::*; -pub use bytecode::*; -pub use heap::*; -pub use object::*; -pub use value::{IntegerOverflowError, Value}; diff --git a/bytecode/src/object/array.rs b/bytecode/src/object/array.rs deleted file mode 100644 index 50f77ee..0000000 --- a/bytecode/src/object/array.rs +++ /dev/null @@ -1,226 +0,0 @@ -use crate::value::Value; - -/// Type tag for array element specialization -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u8)] -pub enum TypeTag { - Int = 0, - Float = 1, - Bool = 2, - Object = 3, -} - -impl TypeTag { - pub fn from_u8(v: u8) -> Option { - match v { - 0 => Some(Self::Int), - 1 => Some(Self::Float), - 2 => Some(Self::Bool), - 3 => Some(Self::Object), - _ => None, - } - } -} - -#[derive(Debug, Clone)] -pub enum ArrayData { - Ints(Box<[i64]>), - Floats(Box<[f64]>), - Bools(Box<[u8]>), - Objects(Box<[Value]>), -} - -impl ArrayData { - pub fn len(&self) -> usize { - match self { - Self::Ints(b) => b.len(), - Self::Floats(b) => b.len(), - Self::Bools(b) => b.len(), - Self::Objects(b) => b.len(), - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn type_tag(&self) -> TypeTag { - match self { - Self::Ints(_) => TypeTag::Int, - Self::Floats(_) => TypeTag::Float, - Self::Bools(_) => TypeTag::Bool, - Self::Objects(_) => TypeTag::Object, - } - } - - pub fn as_ints(&self) -> Option<&[i64]> { - match self { - Self::Ints(b) => Some(b), - _ => None, - } - } - pub fn as_ints_mut(&mut self) -> Option<&mut [i64]> { - match self { - Self::Ints(b) => Some(b), - _ => None, - } - } - pub fn as_floats(&self) -> Option<&[f64]> { - match self { - Self::Floats(b) => Some(b), - _ => None, - } - } - pub fn as_floats_mut(&mut self) -> Option<&mut [f64]> { - match self { - Self::Floats(b) => Some(b), - _ => None, - } - } - pub fn as_bools(&self) -> Option<&[u8]> { - match self { - Self::Bools(b) => Some(b), - _ => None, - } - } - pub fn as_bools_mut(&mut self) -> Option<&mut [u8]> { - match self { - Self::Bools(b) => Some(b), - _ => None, - } - } - pub fn as_objects(&self) -> Option<&[Value]> { - match self { - Self::Objects(b) => Some(b), - _ => None, - } - } - pub fn as_objects_mut(&mut self) -> Option<&mut [Value]> { - match self { - Self::Objects(b) => Some(b), - _ => None, - } - } -} - -#[derive(Debug, Clone)] -pub struct AelysArray { - pub data: ArrayData, -} - -impl AelysArray { - pub fn new_ints(len: usize) -> Self { - Self { - data: ArrayData::Ints(vec![0i64; len].into_boxed_slice()), - } - } - pub fn new_floats(len: usize) -> Self { - Self { - data: ArrayData::Floats(vec![0.0f64; len].into_boxed_slice()), - } - } - pub fn new_bools(len: usize) -> Self { - Self { - data: ArrayData::Bools(vec![0u8; len].into_boxed_slice()), - } - } - pub fn new_objects(len: usize) -> Self { - Self { - data: ArrayData::Objects(vec![Value::null(); len].into_boxed_slice()), - } - } - - pub fn from_ints(data: Vec) -> Self { - Self { - data: ArrayData::Ints(data.into_boxed_slice()), - } - } - pub fn from_floats(data: Vec) -> Self { - Self { - data: ArrayData::Floats(data.into_boxed_slice()), - } - } - pub fn from_bools(data: Vec) -> Self { - Self { - data: ArrayData::Bools( - data.iter() - .map(|&b| b as u8) - .collect::>() - .into_boxed_slice(), - ), - } - } - pub fn from_objects(data: Vec) -> Self { - Self { - data: ArrayData::Objects(data.into_boxed_slice()), - } - } - - pub fn len(&self) -> usize { - self.data.len() - } - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - pub fn type_tag(&self) -> TypeTag { - self.data.type_tag() - } - - pub fn get(&self, index: usize) -> Option { - if index >= self.len() { - return None; - } - Some(match &self.data { - ArrayData::Ints(b) => Value::int(b[index]), - ArrayData::Floats(b) => Value::float(b[index]), - ArrayData::Bools(b) => Value::bool(b[index] != 0), - ArrayData::Objects(b) => b[index], - }) - } - - pub fn set(&mut self, index: usize, value: Value) -> bool { - if index >= self.len() { - return false; - } - match &mut self.data { - ArrayData::Ints(b) => { - if let Some(v) = value.as_int() { - b[index] = v; - true - } else { - false - } - } - ArrayData::Floats(b) => { - if let Some(v) = value.as_float() { - b[index] = v; - true - } else { - false - } - } - ArrayData::Bools(b) => { - if let Some(v) = value.as_bool() { - b[index] = v as u8; - true - } else { - false - } - } - ArrayData::Objects(b) => { - b[index] = value; - true - } - } - } - - pub fn size_bytes(&self) -> usize { - std::mem::size_of::() - + match &self.data { - ArrayData::Ints(b) => b.len() * 8, - ArrayData::Floats(b) => b.len() * 8, - ArrayData::Bools(b) => b.len(), - ArrayData::Objects(b) => b.len() * 8, - } - } -} diff --git a/bytecode/src/object/closure.rs b/bytecode/src/object/closure.rs deleted file mode 100644 index c472571..0000000 --- a/bytecode/src/object/closure.rs +++ /dev/null @@ -1,53 +0,0 @@ -use super::GcRef; - -/// Cached metadata for faster closure execution -#[derive(Debug, Clone)] -pub struct ClosureCache { - pub bytecode_ptr: *const u32, - pub bytecode_len: usize, - pub constants_ptr: *const crate::value::Value, - pub constants_len: usize, - pub arity: u8, - pub num_registers: u8, -} - -/// A closure wraps a function with its captured upvalues. -#[derive(Debug, Clone)] -pub struct AelysClosure { - pub function: GcRef, - pub upvalues: Vec, - pub bytecode_ptr: *const u32, - pub bytecode_len: usize, - pub constants_ptr: *const crate::value::Value, - pub constants_len: usize, - pub arity: u8, - pub num_registers: u8, -} - -impl AelysClosure { - pub fn new(function: GcRef, upvalues: Vec) -> Self { - Self { - function, - upvalues, - bytecode_ptr: std::ptr::null(), - bytecode_len: 0, - constants_ptr: std::ptr::null(), - constants_len: 0, - arity: 0, - num_registers: 0, - } - } - - pub fn with_cache(function: GcRef, upvalues: Vec, cache: ClosureCache) -> Self { - Self { - function, - upvalues, - bytecode_ptr: cache.bytecode_ptr, - bytecode_len: cache.bytecode_len, - constants_ptr: cache.constants_ptr, - constants_len: cache.constants_len, - arity: cache.arity, - num_registers: cache.num_registers, - } - } -} diff --git a/bytecode/src/object/function.rs b/bytecode/src/object/function.rs deleted file mode 100644 index 9ad6207..0000000 --- a/bytecode/src/object/function.rs +++ /dev/null @@ -1,29 +0,0 @@ -use crate::Function; - -/// A wrapped bytecode function for the GC heap. -#[derive(Debug, Clone)] -pub struct AelysFunction { - pub function: Function, - pub verified: bool, -} - -impl AelysFunction { - pub fn new(function: Function) -> Self { - Self { - function, - verified: false, - } - } - - pub fn name(&self) -> Option<&str> { - self.function.name.as_deref() - } - - pub fn arity(&self) -> u8 { - self.function.arity - } - - pub fn num_registers(&self) -> u8 { - self.function.num_registers - } -} diff --git a/bytecode/src/object/gc_object.rs b/bytecode/src/object/gc_object.rs deleted file mode 100644 index 4e09b43..0000000 --- a/bytecode/src/object/gc_object.rs +++ /dev/null @@ -1,16 +0,0 @@ -use super::ObjectKind; - -#[derive(Debug)] -pub struct GcObject { - pub marked: bool, // for mark-sweep - pub kind: ObjectKind, -} - -impl GcObject { - pub fn new(kind: ObjectKind) -> Self { - Self { - marked: false, - kind, - } - } -} diff --git a/bytecode/src/object/gc_ref.rs b/bytecode/src/object/gc_ref.rs deleted file mode 100644 index 74ba1c5..0000000 --- a/bytecode/src/object/gc_ref.rs +++ /dev/null @@ -1,25 +0,0 @@ -/// Reference to a GC object (index into the heap). -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct GcRef(usize); - -impl GcRef { - pub fn new(index: usize) -> Self { - Self(index) - } - - pub fn index(&self) -> usize { - self.0 - } -} - -impl From for GcRef { - fn from(index: usize) -> Self { - Self(index) - } -} - -impl From for usize { - fn from(gc_ref: GcRef) -> Self { - gc_ref.0 - } -} diff --git a/bytecode/src/object/kinds.rs b/bytecode/src/object/kinds.rs deleted file mode 100644 index f145e81..0000000 --- a/bytecode/src/object/kinds.rs +++ /dev/null @@ -1,15 +0,0 @@ -use super::{ - AelysArray, AelysClosure, AelysFunction, AelysString, AelysUpvalue, AelysVec, NativeFunction, -}; - -/// The different types of GC-managed objects. -#[derive(Debug)] -pub enum ObjectKind { - String(AelysString), - Function(AelysFunction), - Native(NativeFunction), - Upvalue(AelysUpvalue), - Closure(AelysClosure), - Array(AelysArray), - Vec(AelysVec), -} diff --git a/bytecode/src/object/mod.rs b/bytecode/src/object/mod.rs deleted file mode 100644 index d1d5db7..0000000 --- a/bytecode/src/object/mod.rs +++ /dev/null @@ -1,23 +0,0 @@ -// heap object types - -mod array; -mod closure; -mod function; -mod gc_object; -mod gc_ref; -mod kinds; -mod native; -mod string; -mod upvalue; -mod vec; - -pub use array::{AelysArray, ArrayData, TypeTag}; -pub use closure::{AelysClosure, ClosureCache}; -pub use function::AelysFunction; -pub use gc_object::GcObject; -pub use gc_ref::GcRef; -pub use kinds::ObjectKind; -pub use native::NativeFunction; -pub use string::AelysString; -pub use upvalue::{AelysUpvalue, UpvalueLocation}; -pub use vec::AelysVec; diff --git a/bytecode/src/object/native.rs b/bytecode/src/object/native.rs deleted file mode 100644 index 958ef04..0000000 --- a/bytecode/src/object/native.rs +++ /dev/null @@ -1,15 +0,0 @@ -// just metadata, actual execution logic is in runtime crate -#[derive(Clone, Debug)] -pub struct NativeFunction { - pub name: String, - pub arity: u8, -} - -impl NativeFunction { - pub fn new(name: impl Into, arity: u8) -> Self { - Self { - name: name.into(), - arity, - } - } -} diff --git a/bytecode/src/object/string.rs b/bytecode/src/object/string.rs deleted file mode 100644 index 55f5430..0000000 --- a/bytecode/src/object/string.rs +++ /dev/null @@ -1,67 +0,0 @@ -use std::hash::{Hash, Hasher}; - -// immutable string with cached hash (for fast comparison and interning) -#[derive(Debug, Clone)] -pub struct AelysString { - hash: u64, - data: Box<[u8]>, -} - -impl AelysString { - pub fn new(s: &str) -> Self { - let data = s.as_bytes().to_vec().into_boxed_slice(); - Self { - hash: Self::compute_hash(&data), - data, - } - } - - pub fn from_bytes(bytes: Vec) -> Self { - let hash = Self::compute_hash(&bytes); - Self { - hash, - data: bytes.into_boxed_slice(), - } - } - - pub fn hash(&self) -> u64 { - self.hash - } - pub fn as_bytes(&self) -> &[u8] { - &self.data - } - pub fn len(&self) -> usize { - self.data.len() - } - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - // SAFETY: we only store valid utf-8 - pub fn as_str(&self) -> &str { - unsafe { std::str::from_utf8_unchecked(&self.data) } - } - - fn compute_hash(bytes: &[u8]) -> u64 { - let mut h = std::collections::hash_map::DefaultHasher::new(); - bytes.hash(&mut h); - h.finish() - } -} - -impl PartialEq for AelysString { - fn eq(&self, other: &Self) -> bool { - if self.hash != other.hash { - return false; - } - self.data == other.data - } -} - -impl Eq for AelysString {} - -impl Hash for AelysString { - fn hash(&self, state: &mut H) { - self.hash.hash(state); - } -} diff --git a/bytecode/src/object/upvalue.rs b/bytecode/src/object/upvalue.rs deleted file mode 100644 index d9d4ccb..0000000 --- a/bytecode/src/object/upvalue.rs +++ /dev/null @@ -1,33 +0,0 @@ -use crate::value::Value; - -/// An upvalue captures a variable from an enclosing scope. -#[derive(Debug, Clone)] -pub struct AelysUpvalue { - pub location: UpvalueLocation, -} - -/// Where an upvalue's value is stored. -#[derive(Debug, Clone)] -pub enum UpvalueLocation { - Open { frame_base: usize, register: u8 }, - Closed(Value), -} - -impl AelysUpvalue { - pub fn new_open(frame_base: usize, register: u8) -> Self { - Self { - location: UpvalueLocation::Open { - frame_base, - register, - }, - } - } - - pub fn is_open(&self) -> bool { - matches!(self.location, UpvalueLocation::Open { .. }) - } - - pub fn close(&mut self, value: Value) { - self.location = UpvalueLocation::Closed(value); - } -} diff --git a/bytecode/src/object/vec.rs b/bytecode/src/object/vec.rs deleted file mode 100644 index d2d4141..0000000 --- a/bytecode/src/object/vec.rs +++ /dev/null @@ -1,320 +0,0 @@ -use super::array::{AelysArray, ArrayData, TypeTag}; -use crate::value::Value; - -#[derive(Debug, Clone)] -pub enum VecData { - Ints(Vec), - Floats(Vec), - Bools(Vec), - Objects(Vec), -} - -impl VecData { - pub fn len(&self) -> usize { - match self { - Self::Ints(v) => v.len(), - Self::Floats(v) => v.len(), - Self::Bools(v) => v.len(), - Self::Objects(v) => v.len(), - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn capacity(&self) -> usize { - match self { - Self::Ints(v) => v.capacity(), - Self::Floats(v) => v.capacity(), - Self::Bools(v) => v.capacity(), - Self::Objects(v) => v.capacity(), - } - } - - pub fn type_tag(&self) -> TypeTag { - match self { - Self::Ints(_) => TypeTag::Int, - Self::Floats(_) => TypeTag::Float, - Self::Bools(_) => TypeTag::Bool, - Self::Objects(_) => TypeTag::Object, - } - } - - pub fn as_ints(&self) -> Option<&[i64]> { - match self { - Self::Ints(v) => Some(v), - _ => None, - } - } - pub fn as_ints_mut(&mut self) -> Option<&mut Vec> { - match self { - Self::Ints(v) => Some(v), - _ => None, - } - } - pub fn as_floats(&self) -> Option<&[f64]> { - match self { - Self::Floats(v) => Some(v), - _ => None, - } - } - pub fn as_floats_mut(&mut self) -> Option<&mut Vec> { - match self { - Self::Floats(v) => Some(v), - _ => None, - } - } - pub fn as_bools(&self) -> Option<&[u8]> { - match self { - Self::Bools(v) => Some(v), - _ => None, - } - } - pub fn as_bools_mut(&mut self) -> Option<&mut Vec> { - match self { - Self::Bools(v) => Some(v), - _ => None, - } - } - pub fn as_objects(&self) -> Option<&[Value]> { - match self { - Self::Objects(v) => Some(v), - _ => None, - } - } - pub fn as_objects_mut(&mut self) -> Option<&mut Vec> { - match self { - Self::Objects(v) => Some(v), - _ => None, - } - } -} - -#[derive(Debug, Clone)] -pub struct AelysVec { - pub data: VecData, -} - -impl AelysVec { - pub fn new_ints() -> Self { - Self { - data: VecData::Ints(Vec::new()), - } - } - pub fn new_floats() -> Self { - Self { - data: VecData::Floats(Vec::new()), - } - } - pub fn new_bools() -> Self { - Self { - data: VecData::Bools(Vec::new()), - } - } - pub fn new_objects() -> Self { - Self { - data: VecData::Objects(Vec::new()), - } - } - - pub fn with_capacity_ints(cap: usize) -> Self { - Self { - data: VecData::Ints(Vec::with_capacity(cap)), - } - } - pub fn with_capacity_floats(cap: usize) -> Self { - Self { - data: VecData::Floats(Vec::with_capacity(cap)), - } - } - pub fn with_capacity_bools(cap: usize) -> Self { - Self { - data: VecData::Bools(Vec::with_capacity(cap)), - } - } - pub fn with_capacity_objects(cap: usize) -> Self { - Self { - data: VecData::Objects(Vec::with_capacity(cap)), - } - } - - pub fn from_ints(data: Vec) -> Self { - Self { - data: VecData::Ints(data), - } - } - pub fn from_floats(data: Vec) -> Self { - Self { - data: VecData::Floats(data), - } - } - pub fn from_bools(data: Vec) -> Self { - Self { - data: VecData::Bools(data.iter().map(|&b| b as u8).collect()), - } - } - pub fn from_objects(data: Vec) -> Self { - Self { - data: VecData::Objects(data), - } - } - - pub fn len(&self) -> usize { - self.data.len() - } - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - pub fn capacity(&self) -> usize { - self.data.capacity() - } - pub fn type_tag(&self) -> TypeTag { - self.data.type_tag() - } - - pub fn get(&self, index: usize) -> Option { - if index >= self.len() { - return None; - } - Some(match &self.data { - VecData::Ints(v) => Value::int(v[index]), - VecData::Floats(v) => Value::float(v[index]), - VecData::Bools(v) => Value::bool(v[index] != 0), - VecData::Objects(v) => v[index], - }) - } - - pub fn set(&mut self, index: usize, value: Value) -> bool { - if index >= self.len() { - return false; - } - match &mut self.data { - VecData::Ints(v) => { - if let Some(val) = value.as_int() { - v[index] = val; - true - } else { - false - } - } - VecData::Floats(v) => { - if let Some(val) = value.as_float() { - v[index] = val; - true - } else { - false - } - } - VecData::Bools(v) => { - if let Some(val) = value.as_bool() { - v[index] = val as u8; - true - } else { - false - } - } - VecData::Objects(v) => { - v[index] = value; - true - } - } - } - - pub fn push(&mut self, value: Value) -> bool { - match &mut self.data { - VecData::Ints(v) => { - if let Some(val) = value.as_int() { - v.push(val); - true - } else { - false - } - } - VecData::Floats(v) => { - if let Some(val) = value.as_float() { - v.push(val); - true - } else { - false - } - } - VecData::Bools(v) => { - if let Some(val) = value.as_bool() { - v.push(val as u8); - true - } else { - false - } - } - VecData::Objects(v) => { - v.push(value); - true - } - } - } - - pub fn pop(&mut self) -> Option { - match &mut self.data { - VecData::Ints(v) => v.pop().map(Value::int), - VecData::Floats(v) => v.pop().map(Value::float), - VecData::Bools(v) => v.pop().map(|b| Value::bool(b != 0)), - VecData::Objects(v) => v.pop(), - } - } - - pub fn reserve(&mut self, additional: usize) { - match &mut self.data { - VecData::Ints(v) => v.reserve(additional), - VecData::Floats(v) => v.reserve(additional), - VecData::Bools(v) => v.reserve(additional), - VecData::Objects(v) => v.reserve(additional), - } - } - - pub fn clear(&mut self) { - match &mut self.data { - VecData::Ints(v) => v.clear(), - VecData::Floats(v) => v.clear(), - VecData::Bools(v) => v.clear(), - VecData::Objects(v) => v.clear(), - } - } - - pub fn shrink_to_fit(&mut self) { - match &mut self.data { - VecData::Ints(v) => v.shrink_to_fit(), - VecData::Floats(v) => v.shrink_to_fit(), - VecData::Bools(v) => v.shrink_to_fit(), - VecData::Objects(v) => v.shrink_to_fit(), - } - } - - pub fn to_array(&self) -> AelysArray { - AelysArray { - data: match &self.data { - VecData::Ints(v) => ArrayData::Ints(v.clone().into_boxed_slice()), - VecData::Floats(v) => ArrayData::Floats(v.clone().into_boxed_slice()), - VecData::Bools(v) => ArrayData::Bools(v.clone().into_boxed_slice()), - VecData::Objects(v) => ArrayData::Objects(v.clone().into_boxed_slice()), - }, - } - } - - pub fn objects(&self) -> Option<&[Value]> { - match &self.data { - VecData::Objects(v) => Some(v), - _ => None, - } - } - - pub fn size_bytes(&self) -> usize { - std::mem::size_of::() - + match &self.data { - VecData::Ints(v) => v.capacity() * 8, - VecData::Floats(v) => v.capacity() * 8, - VecData::Bools(v) => v.capacity(), - VecData::Objects(v) => v.capacity() * 8, - } - } -} diff --git a/bytecode/src/value/accessors.rs b/bytecode/src/value/accessors.rs deleted file mode 100644 index d3739a6..0000000 --- a/bytecode/src/value/accessors.rs +++ /dev/null @@ -1,55 +0,0 @@ -use super::{PAYLOAD_MASK, QNAN, TAG_MASK, TAG_NESTED_FN, Value}; - -impl Value { - pub fn as_int(&self) -> Option { - if !self.is_int() { - return None; - } - let payload = self.0 & PAYLOAD_MASK; - Some(((payload << 16) as i64) >> 16) // sign-extend from 48 bits - } - - pub fn as_float(&self) -> Option { - self.is_float().then(|| f64::from_bits(self.0)) - } - - pub fn as_bool(&self) -> Option { - self.is_bool().then_some((self.0 & 1) != 0) - } - - pub fn as_ptr(&self) -> Option { - self.is_ptr().then_some((self.0 & PAYLOAD_MASK) as usize) - } - - /// Check if this is a nested function marker and return the index if so. - pub fn as_nested_fn_marker(&self) -> Option { - if (self.0 & (QNAN | TAG_MASK)) == (QNAN | TAG_NESTED_FN) { - Some((self.0 & PAYLOAD_MASK) as usize) - } else { - None - } - } - - #[inline(always)] - pub fn raw_bits(&self) -> u64 { - self.0 - } - - #[inline(always)] - pub fn from_raw(bits: u64) -> Self { - Self(bits) - } - - // unchecked variants for type-specialized opcodes (hot paths) - #[inline(always)] - pub fn as_int_unchecked(&self) -> i64 { - debug_assert!(self.is_int(), "type confusion: not an int"); - ((self.0 & PAYLOAD_MASK) << 16) as i64 >> 16 - } - - #[inline(always)] - pub fn as_float_unchecked(&self) -> f64 { - debug_assert!(self.is_float(), "type confusion: not a float"); - f64::from_bits(self.0) - } -} diff --git a/bytecode/src/value/checks.rs b/bytecode/src/value/checks.rs deleted file mode 100644 index da794e8..0000000 --- a/bytecode/src/value/checks.rs +++ /dev/null @@ -1,30 +0,0 @@ -use super::{QNAN, TAG_BOOL, TAG_INT, TAG_MASK, TAG_NESTED_FN, TAG_NULL, TAG_PTR, Value}; - -impl Value { - // floats are the only values that don't have QNAN set (except actual NaN which we canonicalize) - // Canonical NaN (TAG_NAN) is also a float. TAG_NESTED_FN is not a float. - pub fn is_float(&self) -> bool { - if (self.0 & QNAN) != QNAN { - return true; - } - let tag = self.0 & TAG_MASK; - tag != TAG_PTR - && tag != TAG_INT - && tag != TAG_BOOL - && tag != TAG_NULL - && tag != TAG_NESTED_FN - } - - pub fn is_int(&self) -> bool { - (self.0 & (QNAN | TAG_MASK)) == (QNAN | TAG_INT) - } - pub fn is_bool(&self) -> bool { - (self.0 & (QNAN | TAG_MASK)) == (QNAN | TAG_BOOL) - } - pub fn is_null(&self) -> bool { - (self.0 & (QNAN | TAG_MASK)) == (QNAN | TAG_NULL) - } - pub fn is_ptr(&self) -> bool { - (self.0 & (QNAN | TAG_MASK)) == (QNAN | TAG_PTR) - } -} diff --git a/bytecode/src/value/constructors.rs b/bytecode/src/value/constructors.rs deleted file mode 100644 index f4e67aa..0000000 --- a/bytecode/src/value/constructors.rs +++ /dev/null @@ -1,50 +0,0 @@ -use super::{ - CANONICAL_NAN, IntegerOverflowError, PAYLOAD_MASK, QNAN, TAG_BOOL, TAG_INT, TAG_NESTED_FN, - TAG_NULL, TAG_PTR, Value, -}; - -impl Value { - // wraps silently on overflow - caller should use int_checked if they care - #[inline(always)] - pub fn int(n: i64) -> Self { - Self(QNAN | TAG_INT | ((n as u64) & PAYLOAD_MASK)) - } - - #[inline(always)] - pub fn int_checked(n: i64) -> Result { - // sign-extend from 48 bits and check if it matches - if n == (n << 16) >> 16 { - Ok(Self::int(n)) - } else { - Err(IntegerOverflowError { value: n }) - } - } - - pub fn float(n: f64) -> Self { - if n.is_nan() { - Self(CANONICAL_NAN) - } else { - Self(n.to_bits()) - } - } - - pub fn bool(b: bool) -> Self { - Self(QNAN | TAG_BOOL | (b as u64)) - } - - pub fn null() -> Self { - Self(QNAN | TAG_NULL) - } - - pub fn ptr(p: usize) -> Self { - debug_assert!(p <= PAYLOAD_MASK as usize, "ptr too big for NaN boxing"); - Self(QNAN | TAG_PTR | (p as u64)) - } - - /// create a nested function marker for use in constants array. - /// this uses a dedicated tag that can't collide with heap pointers. - pub fn nested_fn_marker(idx: usize) -> Self { - debug_assert!(idx <= PAYLOAD_MASK as usize, "nested fn index too big"); - Self(QNAN | TAG_NESTED_FN | (idx as u64)) - } -} diff --git a/bytecode/src/value/docs.md b/bytecode/src/value/docs.md deleted file mode 100644 index fb545dc..0000000 --- a/bytecode/src/value/docs.md +++ /dev/null @@ -1,10 +0,0 @@ -# NaN-Boxed Value Representation - -This module implements Aelys values using NaN-boxing to pack primitives and object -pointers into a single 64-bit word. - -- Integers are limited to 48 bits (±2^47). -- Floats are stored as raw IEEE-754 bits. -- Tagged values encode int/bool/null/ptr in the NaN payload. - -Use `Value::int_checked` for user-provided integers to avoid silent wraparound. diff --git a/bytecode/src/value/fmt.rs b/bytecode/src/value/fmt.rs deleted file mode 100644 index f50930b..0000000 --- a/bytecode/src/value/fmt.rs +++ /dev/null @@ -1,69 +0,0 @@ -use super::Value; - -impl std::fmt::Debug for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.is_null() { - write!(f, "null") - } else if let Some(b) = self.as_bool() { - write!(f, "{}", b) - } else if let Some(n) = self.as_int() { - write!(f, "{}", n) - } else if let Some(n) = self.as_float() { - if n.fract() == 0.0 { - write!(f, "{}.0", n) - } else { - write!(f, "{}", n) - } - } else if let Some(p) = self.as_ptr() { - write!(f, "", p) - } else { - write!(f, "", self.0) - } - } -} - -impl std::fmt::Display for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.is_null() { - write!(f, "null") - } else if let Some(b) = self.as_bool() { - write!(f, "{}", b) - } else if let Some(n) = self.as_int() { - write!(f, "{}", n) - } else if let Some(n) = self.as_float() { - if n.fract() == 0.0 { - write!(f, "{}.0", n) - } else { - write!(f, "{}", n) - } - } else if let Some(_p) = self.as_ptr() { - write!(f, "") - } else { - write!(f, "") - } - } -} - -impl Default for Value { - fn default() -> Self { - Self::null() - } -} - -impl PartialEq for Value { - fn eq(&self, other: &Self) -> bool { - if self.0 == other.0 { - return true; - } - if self.is_float() && other.is_float() { - return self.as_float() == other.as_float(); - } - if let (Some(a), Some(b)) = (self.as_int(), other.as_float()) { - return (a as f64) == b; - } - if let (Some(a), Some(b)) = (self.as_float(), other.as_int()) { - return a == (b as f64); - } - false - } -} diff --git a/bytecode/src/value/mod.rs b/bytecode/src/value/mod.rs deleted file mode 100644 index beddece..0000000 --- a/bytecode/src/value/mod.rs +++ /dev/null @@ -1,38 +0,0 @@ -#![doc = include_str!("docs.md")] - -mod accessors; -mod checks; -mod constructors; -mod fmt; -mod util; - -#[derive(Debug, Clone)] -pub struct IntegerOverflowError { - pub value: i64, -} - -// NaN-boxing: all values fit in 64 bits. Floats are stored directly, -// everything else uses the NaN space (quiet NaN has 51 bits of payload). -// This approach is used by LuaJIT, JavaScriptCore, etc. - proven fast. -// -// The 48-bit integer limit is a tradeoff: 64-bit ints would need heap -// allocation or a different encoding. ±140 trillion should be enough... -#[derive(Clone, Copy)] -pub struct Value(u64); - -// Tag bits are in bits 48-50, payload in bits 0-47 -const QNAN: u64 = 0x7FF8_0000_0000_0000; -const TAG_MASK: u64 = 0x0007_0000_0000_0000; -const TAG_PTR: u64 = 0x0000_0000_0000_0000; -const TAG_INT: u64 = 0x0001_0000_0000_0000; -const TAG_BOOL: u64 = 0x0002_0000_0000_0000; -const TAG_NULL: u64 = 0x0003_0000_0000_0000; -const TAG_NAN: u64 = 0x0004_0000_0000_0000; -const TAG_NESTED_FN: u64 = 0x0005_0000_0000_0000; // marker for nested functions in constants -const PAYLOAD_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; -const CANONICAL_NAN: u64 = QNAN | TAG_NAN | 1; - -impl Value { - pub const INT_MIN: i64 = -(1i64 << 47); - pub const INT_MAX: i64 = (1i64 << 47) - 1; -} diff --git a/bytecode/src/value/util.rs b/bytecode/src/value/util.rs deleted file mode 100644 index 3f05959..0000000 --- a/bytecode/src/value/util.rs +++ /dev/null @@ -1,35 +0,0 @@ -use super::Value; - -impl Value { - /// Check if value is truthy. - pub fn is_truthy(&self) -> bool { - if self.is_null() { - false - } else if let Some(b) = self.as_bool() { - b - } else if let Some(n) = self.as_int() { - n != 0 - } else if let Some(n) = self.as_float() { - n != 0.0 - } else { - true - } - } - - /// Get type name for error messages. - pub fn type_name(&self) -> &'static str { - if self.is_null() { - "Null" - } else if self.is_bool() { - "Bool" - } else if self.is_int() { - "Int" - } else if self.is_float() { - "Float" - } else if self.is_ptr() { - "Object" - } else { - "Unknown" - } - } -} diff --git a/cli/Cargo.toml b/cli/Cargo.toml index d215c87..267e099 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -4,22 +4,13 @@ version.workspace = true edition = "2024" [dependencies] -aelys = { path = "../aelys" } -aelys-backend = { path = "../backend" } -aelys-bytecode = { path = "../bytecode" } aelys-common = { path = "../common" } aelys-driver = { path = "../driver" } -aelys-frontend = { path = "../frontend" } aelys-opt = { path = "../opt" } -aelys-runtime = { path = "../runtime" } -aelys-sema = { path = "../sema" } -aelys-syntax = { path = "../syntax" } aelys-air = { path = "../air" } -aelys-modules = { path = "../modules" } -aelys-native = { path = "../native" } semver = "1.0" [dev-dependencies] [lib] -doctest = false \ No newline at end of file +doctest = false diff --git a/cli/src/cli/args/mod.rs b/cli/src/cli/args/mod.rs index 53f9706..9633cf7 100644 --- a/cli/src/cli/args/mod.rs +++ b/cli/src/cli/args/mod.rs @@ -6,31 +6,32 @@ use aelys_opt::OptimizationLevel; pub use parse::parse_args; pub use usage::usage; +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ColorChoice { + Auto, + Always, + Never, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum Command { Help, - Run { - path: String, - program_args: Vec, - }, Compile { path: String, output: Option, emit_air: bool, + emit_llvm_ir: bool, }, - Asm { - path: String, - output: Option, - stdout: bool, + Explain { + code: String, }, - Repl, Version, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParsedArgs { pub command: Command, - pub vm_args: Vec, pub opt_level: OptimizationLevel, pub warning_flags: Vec, + pub color: ColorChoice, } diff --git a/cli/src/cli/args/parse.rs b/cli/src/cli/args/parse.rs index f57abd1..d8f9dbe 100644 --- a/cli/src/cli/args/parse.rs +++ b/cli/src/cli/args/parse.rs @@ -1,14 +1,12 @@ // hand-rolled recursive descent, clap felt overkill for this -use super::{Command, ParsedArgs}; +use super::{ColorChoice, Command, ParsedArgs}; use aelys_opt::OptimizationLevel; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum CommandName { - Run, Compile, - Asm, - Repl, + Explain, Help, Version, } @@ -23,13 +21,13 @@ struct Parser<'a> { index: usize, command: Option, path: Option, - program_args: Vec, - vm_args: Vec, opt_level: OptimizationLevel, output: Option, - stdout: bool, emit_air: bool, + emit_llvm_ir: bool, warning_flags: Vec, + color: ColorChoice, + explain_code: Option, } impl<'a> Parser<'a> { @@ -40,13 +38,13 @@ impl<'a> Parser<'a> { index: 0, command: None, path: None, - program_args: Vec::new(), - vm_args: Vec::new(), opt_level: OptimizationLevel::Standard, output: None, - stdout: false, emit_air: false, + emit_llvm_ir: false, warning_flags: Vec::new(), + color: ColorChoice::Auto, + explain_code: None, } } @@ -74,8 +72,7 @@ impl<'a> Parser<'a> { continue; } - if let Some((vm_arg, consumed_next)) = self.parse_vm_arg(token_str)? { - self.vm_args.push(vm_arg); + if let Some(consumed_next) = self.parse_output_option(token_str)? { self.advance(); if consumed_next { self.advance(); @@ -83,22 +80,67 @@ impl<'a> Parser<'a> { continue; } - if let Some(consumed_next) = self.parse_output_option(token_str)? { + if token_str == "--emit-air" { + self.emit_air = true; self.advance(); - if consumed_next { - self.advance(); - } continue; } - if self.is_stdout(token_str) { - self.stdout = true; + if token_str == "--emit-llvm-ir" { + self.emit_llvm_ir = true; self.advance(); continue; } - if token_str == "--emit-air" { - self.emit_air = true; + if token_str == "--no-color" { + self.color = ColorChoice::Never; + self.advance(); + continue; + } + + if let Some(rest) = token_str.strip_prefix("--color=") { + self.color = match rest { + "auto" => ColorChoice::Auto, + "always" => ColorChoice::Always, + "never" => ColorChoice::Never, + _ => { + return Err(format!( + "invalid --color value: {} (expected auto, always, or never)", + rest + )); + } + }; + self.advance(); + continue; + } + + if token_str == "--color" { + let next = self.peek_next().ok_or_else(|| { + "--color requires a value (auto, always, or never)".to_string() + })?; + self.color = match next { + "auto" => ColorChoice::Auto, + "always" => ColorChoice::Always, + "never" => ColorChoice::Never, + _ => { + return Err(format!( + "invalid --color value: {} (expected auto, always, or never)", + next + )); + } + }; + self.advance(); + self.advance(); + continue; + } + + if token_str == "--explain" { + let next = self + .peek_next() + .ok_or_else(|| "--explain requires an error code (e.g., E0401)".to_string())?; + self.explain_code = Some(next.to_string()); + self.command = Some(CommandName::Explain); + self.advance(); self.advance(); continue; } @@ -121,11 +163,6 @@ impl<'a> Parser<'a> { } if token_str.starts_with('-') { - if matches!(self.command, Some(CommandName::Run)) && self.path.is_some() { - self.program_args.push(token); - self.advance(); - continue; - } return Err(format!("unknown flag: {}", token_str)); } @@ -141,43 +178,16 @@ impl<'a> Parser<'a> { None => Command::Help, Some(CommandName::Help) => Command::Help, Some(CommandName::Version) => Command::Version, - Some(CommandName::Repl) => { - if self.path.is_some() || !self.program_args.is_empty() { - return Err("repl does not accept a path or arguments".to_string()); - } - if self.output.is_some() || self.stdout { - return Err("repl does not accept output flags".to_string()); - } - if self.emit_air { - return Err("--emit-air is only supported for compile".to_string()); - } - Command::Repl - } - Some(CommandName::Run) => { - let path = self - .path - .ok_or_else(|| "missing file for run".to_string())?; - if self.output.is_some() || self.stdout { - return Err("output flags are only supported for compile or asm".to_string()); - } - if self.emit_air { - return Err("--emit-air is only supported for compile".to_string()); - } - Command::Run { - path, - program_args: self.program_args, - } + Some(CommandName::Explain) => { + let code = self + .explain_code + .ok_or_else(|| "--explain requires an error code".to_string())?; + Command::Explain { code } } Some(CommandName::Compile) => { let path = self .path .ok_or_else(|| "missing file for compile".to_string())?; - if !self.program_args.is_empty() { - return Err("compile does not accept extra arguments".to_string()); - } - if self.stdout { - return Err("compile does not support --stdout".to_string()); - } if self.emit_air && self.output.is_some() { return Err("--emit-air and --output cannot be combined".to_string()); } @@ -185,64 +195,44 @@ impl<'a> Parser<'a> { path, output: self.output, emit_air: self.emit_air, - } - } - Some(CommandName::Asm) => { - let path = self - .path - .ok_or_else(|| "missing file for asm".to_string())?; - if !self.program_args.is_empty() { - return Err("asm does not accept extra arguments".to_string()); - } - if self.emit_air { - return Err("--emit-air is only supported for compile".to_string()); - } - Command::Asm { - path, - output: self.output, - stdout: self.stdout, + emit_llvm_ir: self.emit_llvm_ir, } } }; Ok(ParsedArgs { command, - vm_args: self.vm_args, opt_level: self.opt_level, warning_flags: self.warning_flags, + color: self.color, }) } fn finish_help(self) -> ParsedArgs { ParsedArgs { command: Command::Help, - vm_args: Vec::new(), opt_level: OptimizationLevel::Standard, warning_flags: Vec::new(), + color: self.color, } } fn finish_version(self) -> ParsedArgs { ParsedArgs { command: Command::Version, - vm_args: Vec::new(), opt_level: OptimizationLevel::Standard, warning_flags: Vec::new(), + color: self.color, } } fn consume_positional(&mut self, token: &str) -> Result<(), String> { match self.command { None => { - self.command = Some(CommandName::Run); - self.path = Some(token.to_string()); - } - Some(CommandName::Run) => { - if self.path.is_none() { - self.path = Some(token.to_string()); - } else { - self.program_args.push(token.to_string()); - } + return Err(format!( + "unexpected argument: {}. Use 'aelys compile ' to compile.", + token + )); } Some(CommandName::Compile) => { if self.path.is_none() { @@ -251,16 +241,13 @@ impl<'a> Parser<'a> { return Err(format!("unexpected argument for compile: {}", token)); } } - Some(CommandName::Asm) => { - if self.path.is_none() { - self.path = Some(token.to_string()); + Some(CommandName::Explain) => { + if self.explain_code.is_none() { + self.explain_code = Some(token.to_string()); } else { - return Err(format!("unexpected argument for asm: {}", token)); + return Err(format!("unexpected argument for explain: {}", token)); } } - Some(CommandName::Repl) => { - return Err(format!("unexpected argument for repl: {}", token)); - } Some(CommandName::Version) => { return Err(format!("unexpected argument for version: {}", token)); } @@ -271,10 +258,8 @@ impl<'a> Parser<'a> { fn parse_command(&self, token: &str) -> Option { match token { - "run" => Some(CommandName::Run), "compile" => Some(CommandName::Compile), - "asm" => Some(CommandName::Asm), - "repl" => Some(CommandName::Repl), + "explain" => Some(CommandName::Explain), "help" => Some(CommandName::Help), "version" => Some(CommandName::Version), _ => None, @@ -301,35 +286,6 @@ impl<'a> Parser<'a> { Ok(None) } - fn parse_vm_arg(&self, token: &str) -> Result, String> { - if token == "--dev" { - return Ok(Some((token.to_string(), false))); - } - if token.starts_with("--allow-caps=") || token.starts_with("--deny-caps=") { - return Ok(Some((token.to_string(), false))); - } - if token == "--allow-caps" || token == "--deny-caps" { - let next = self - .peek_next() - .ok_or_else(|| format!("missing value for {}", token))?; - return Ok(Some((format!("{}={}", token, next), true))); - } - if token.starts_with("-ae.") || token.starts_with("--ae-") { - return Ok(Some((token.to_string(), false))); - } - - // TODO: proper fix because this is a workaround - // powershell splits "-ae.foo=bar" into ["-ae", ".foo=bar"] because '.' - // is a property-access operator. Recombine the two tokens transparently. - if token == "-ae" - && let Some(next) = self.peek_next() - && next.starts_with('.') - { - return Ok(Some((format!("-ae{}", next), true))); - } - Ok(None) - } - fn parse_warning_flag(&self, token: &str) -> Result, String> { // -Wall, -Wno-inline, -Werror, etc if let Some(rest) = token.strip_prefix("-W") { @@ -363,10 +319,6 @@ impl<'a> Parser<'a> { matches!(token, "-v" | "--version") } - fn is_stdout(&self, token: &str) -> bool { - token == "--stdout" - } - fn parse_output_option(&mut self, token: &str) -> Result, String> { if token == "-o" || token == "--output" { let next = self diff --git a/cli/src/cli/args/usage.rs b/cli/src/cli/args/usage.rs index d2fd2c9..790c75a 100644 --- a/cli/src/cli/args/usage.rs +++ b/cli/src/cli/args/usage.rs @@ -1,37 +1,30 @@ pub fn usage() -> &'static str { "Usage: - aelys [flags] [args...] - aelys run [flags] [args...] - aelys compile - aelys asm - aelys repl [flags] + aelys compile [flags] + aelys explain + aelys help aelys version -Flags (any position): +Flags: -h, --help Show help -v, --version Show version -O or -O Optimization level: 0,1,2,3, none, basic, standard, aggressive - -o, --output Output path (compile/asm) - --stdout Print asm to stdout (asm) - --emit-air Print AIR instead of compiling (compile) - -ae.= VM option (e.g., -ae.max-heap=64M) - --ae-= VM option (e.g., --ae-max-heap=64M) - --allow-caps= Allow native capabilities (comma-separated) - --deny-caps= Deny native capabilities (comma-separated) - --dev Enable dev features (hot reload) + -o, --output Output path + --emit-air Print AIR instead of compiling + --emit-llvm-ir Emit LLVM IR to .ll + --explain Show detailed explanation for an error code (e.g., E0401) + --color Control color output (default: auto) + --no-color Disable color output Warning flags: -Wall Enable all warnings -Werror Treat warnings as errors - -W Enable specific category (inline, unused, deprecated, shadow) + -W Enable specific category (inline, unused, deprecated, shadow, type) -Wno- Disable specific category Examples: - aelys main.aelys -O2 --ae-trusted=true - aelys main.aelys -O2 '-ae.trusted=true' (quote in PowerShell) - aelys run -O3 main.aelys arg1 arg2 - aelys repl -ae.max-heap=1G - aelys asm main.aelys --stdout - aelys compile main.aelys -o main.avbc -Wall -Werror - aelys run program.avbc" + aelys compile main.aelys -O2 + aelys compile main.aelys -o output.exe -Wall -Werror + aelys compile main.aelys --emit-llvm-ir + aelys --explain E0401" } diff --git a/cli/src/cli/commands/asm.rs b/cli/src/cli/commands/asm.rs deleted file mode 100644 index 20c4fd8..0000000 --- a/cli/src/cli/commands/asm.rs +++ /dev/null @@ -1,181 +0,0 @@ -// disassembly for debugging bytecode - -use crate::cli::vm_config::parse_vm_args_or_error; -use aelys_backend::Compiler; -use aelys_bytecode::asm::{deserialize_with_manifest, disassemble_to_string}; -use aelys_driver::modules::load_modules_with_loader; -use aelys_frontend::lexer::Lexer; -use aelys_frontend::parser::Parser; -use aelys_opt::{OptimizationLevel, Optimizer}; -use aelys_runtime::{VM, VmConfig}; -use aelys_syntax::{Source, StmtKind}; -use std::path::{Path, PathBuf}; - -#[allow(dead_code)] -pub fn asm_transform(path: &Path) -> Result { - match asm_transform_with_options( - path, - None, - false, - OptimizationLevel::Standard, - VmConfig::default(), - )? { - Some(path) => Ok(path), - None => Err("no output produced".to_string()), - } -} - -pub fn run_with_options( - path: &str, - output: Option, - stdout: bool, - opt_level: OptimizationLevel, - vm_args: Vec, -) -> Result { - let parsed = parse_vm_args_or_error(&vm_args)?; - let config = parsed.config; - - let output_path = output.map(PathBuf::from); - let output = - asm_transform_with_options(Path::new(path), output_path, stdout, opt_level, config)?; - if let Some(path) = output { - eprintln!("Wrote {}", path.display()); - } - Ok(0) -} - -fn asm_transform_with_options( - path: &Path, - output: Option, - stdout: bool, - opt_level: OptimizationLevel, - config: VmConfig, -) -> Result, String> { - let ext = path - .extension() - .and_then(|s| s.to_str()) - .unwrap_or("") - .to_ascii_lowercase(); - - match ext.as_str() { - "aelys" => disassemble_source(path, output, stdout, opt_level, config), - "avbc" => disassemble_avbc(path, output, stdout), - "aasm" => Err("input is already assembly".to_string()), - _ => Err(format!( - "unsupported input extension '{}', expected .aelys or .avbc", - ext - )), - } -} - -fn disassemble_source( - path: &Path, - output: Option, - stdout: bool, - opt_level: OptimizationLevel, - config: VmConfig, -) -> Result, String> { - let content = std::fs::read_to_string(path) - .map_err(|err| format!("failed to read {}: {}", path.display(), err))?; - let (function, heap) = compile_source(path, &content, opt_level, config)?; - let text = disassemble_to_string(&function, Some(&heap)); - write_output(path, output, stdout, text) -} - -fn disassemble_avbc( - path: &Path, - output: Option, - stdout: bool, -) -> Result, String> { - let bytes = - std::fs::read(path).map_err(|err| format!("failed to read {}: {}", path.display(), err))?; - let (function, heap, _manifest, _bundles) = - deserialize_with_manifest(&bytes).map_err(|err| err.to_string())?; - let text = disassemble_to_string(&function, Some(&heap)); - write_output(path, output, stdout, text) -} - -fn write_output( - input: &Path, - output: Option, - stdout: bool, - text: String, -) -> Result, String> { - if stdout { - println!("{}", text); - return Ok(None); - } - let output = output.unwrap_or_else(|| output_path_for(input, "aasm")); - std::fs::write(&output, text) - .map_err(|err| format!("failed to write {}: {}", output.display(), err))?; - Ok(Some(output)) -} - -fn compile_source( - path: &Path, - content: &str, - opt_level: OptimizationLevel, - config: VmConfig, -) -> Result<(aelys_bytecode::Function, aelys_bytecode::Heap), String> { - let name = path.display().to_string(); - let src = Source::new(&name, content); - - let tokens = Lexer::with_source(src.clone()) - .scan() - .map_err(|err| err.to_string())?; - let stmts = Parser::new(tokens, src.clone()) - .parse() - .map_err(|err| err.to_string())?; - - let mut vm = - VM::with_config_and_args(src.clone(), config, Vec::new()).map_err(|err| err.to_string())?; - - let (imports, _loader) = load_modules_with_loader(&stmts, path, src.clone(), &mut vm) - .map_err(|err| err.to_string())?; - - let main_stmts: Vec<_> = stmts - .into_iter() - .filter(|stmt| !matches!(stmt.kind, StmtKind::Needs(_))) - .collect(); - - let mut all_known_globals = imports.known_globals.clone(); - for builtin in ["alloc", "free", "load", "store", "type"] { - all_known_globals.insert(builtin.to_string()); - } - - let typed_program = aelys_sema::TypeInference::infer_program_with_imports( - main_stmts, - src.clone(), - imports.module_aliases.clone(), - all_known_globals, - ) - .map_err(|errors| { - if let Some(err) = errors.first() { - err.to_string() - } else { - "Unknown type error".to_string() - } - })?; - - let mut optimizer = Optimizer::new(opt_level); - let typed_program = optimizer.optimize(typed_program); - - let (function, heap, _globals) = Compiler::with_modules( - None, - src.clone(), - imports.module_aliases, - imports.known_globals, - imports.known_native_globals, - imports.symbol_origins, - ) - .compile_typed(&typed_program) - .map_err(|err| err.to_string())?; - - Ok((function, heap)) -} - -fn output_path_for(path: &Path, extension: &str) -> PathBuf { - let mut output = path.to_path_buf(); - output.set_extension(extension); - output -} diff --git a/cli/src/cli/commands/compile.rs b/cli/src/cli/commands/compile.rs index a4c9d3b..60e63fb 100644 --- a/cli/src/cli/commands/compile.rs +++ b/cli/src/cli/commands/compile.rs @@ -1,368 +1,71 @@ -// source -> avbc compiler +// LLVM native compiler -use aelys_backend::Compiler; -use aelys_bytecode::asm::NativeBundle; -use aelys_common::{Warning, WarningConfig}; -use aelys_driver::modules::{LoadedNativeInfo, load_modules_with_loader}; -use aelys_frontend::lexer::Lexer; -use aelys_frontend::parser::Parser; -use aelys_modules::manifest::Manifest; -use aelys_opt::{OptimizationLevel, Optimizer}; -use aelys_runtime::{VM, VmConfig}; -use aelys_syntax::{Source, StmtKind}; +use aelys_common::{ColorConfig, WarningConfig, format_warnings, render_summary}; +use aelys_driver::{compile_file_with_llvm_with_warnings, lower_file_to_air}; +use aelys_opt::OptimizationLevel; use std::path::{Path, PathBuf}; -use std::sync::Arc; -const BUILTIN_NAMES: &[&str] = &["alloc", "free", "load", "store", "type"]; - -#[allow(dead_code)] -pub fn compile_to_avbc(path: &Path, opt_level: OptimizationLevel) -> Result { - compile_to_avbc_with_output(path, None, opt_level, None).map(|r| r.output_path) -} - -pub struct CompileResult { - pub output_path: PathBuf, - pub warnings: Vec, -} - -pub fn compile_to_avbc_with_output( - path: &Path, - output: Option, +pub fn run_with_options( + path: &str, + output: Option, opt_level: OptimizationLevel, - source_for_warnings: Option>, -) -> Result { - match detect_format(path) { - CompileInput::Assembly => { - let out = assemble_to_avbc(path, output)?; - return Ok(CompileResult { - output_path: out, - warnings: Vec::new(), - }); - } - CompileInput::Bytecode => { - return Err("input is already bytecode".to_string()); - } - CompileInput::Source => {} + _warn_config: WarningConfig, + emit_air: bool, + emit_llvm_ir: bool, + color: &ColorConfig, +) -> Result { + if emit_air { + return emit_air_program(path, opt_level); } - let content = std::fs::read_to_string(path) - .map_err(|err| format!("failed to read {}: {}", path.display(), err))?; - - let name = path.display().to_string(); - let src = Source::new(&name, &content); - - let tokens = Lexer::with_source(src.clone()) - .scan() - .map_err(|err| err.to_string())?; - let stmts = Parser::new(tokens, src.clone()) - .parse() - .map_err(|err| err.to_string())?; - - let mut vm = VM::with_config_and_args(src.clone(), VmConfig::default(), Vec::new()) - .map_err(|err| err.to_string())?; - if let Ok(abs_path) = path.canonicalize() { - vm.set_script_path(abs_path.display().to_string()); - } else { - vm.set_script_path(path.display().to_string()); + if output.is_some() { + return Err("--output is not supported yet".to_string()); } - let (imports, loader) = load_modules_with_loader(&stmts, path, src.clone(), &mut vm) - .map_err(|err| err.to_string())?; + match compile_file_with_llvm_with_warnings(Path::new(path), opt_level, emit_llvm_ir) { + Ok(warnings) => { + let filtered: Vec<_> = warnings + .into_iter() + .filter(|warning| _warn_config.is_enabled(&warning.kind)) + .collect(); - let main_stmts: Vec<_> = stmts - .into_iter() - .filter(|stmt| !matches!(stmt.kind, StmtKind::Needs(_))) - .collect(); + if !filtered.is_empty() { + eprintln!("{}", format_warnings(&filtered)); + } + if _warn_config.treat_as_error && !filtered.is_empty() { + return Err(format!( + "aborting due to {} warning(s) treated as errors", + filtered.len() + )); + } - let mut all_known_globals = imports.known_globals.clone(); - for builtin in BUILTIN_NAMES { - all_known_globals.insert(builtin.to_string()); - } + if emit_llvm_ir { + let mut ir_path = PathBuf::from(path); + ir_path.set_extension("ll"); + eprintln!("Wrote {}", ir_path.display()); + } - let typed_program = aelys_sema::TypeInference::infer_program_with_imports( - main_stmts, - src.clone(), - imports.module_aliases.clone(), - all_known_globals, - ) - .map_err(|errors| { - if let Some(err) = errors.first() { - err.to_string() - } else { - "Unknown type error".to_string() + Ok(0) } - })?; - - let mut optimizer = Optimizer::new(opt_level); - let typed_program = optimizer.optimize(typed_program); - - let warnings: Vec = optimizer - .take_warnings() - .into_iter() - .map(|mut w| { - if w.source.is_none() { - w.source = source_for_warnings.clone().or_else(|| Some(src.clone())); + Err(err) => { + // render each diagnostic individually (for multi-error display) + let diagnostics = err.to_diagnostics(); + for diag in &diagnostics { + eprint!("{}", diag.render(color)); + } + let summary = render_summary(&diagnostics); + if !summary.is_empty() { + eprint!("{}", summary); } - w - }) - .collect(); - - let (mut function, heap, _globals) = Compiler::with_modules( - None, - src.clone(), - imports.module_aliases, - imports.known_globals, - imports.known_native_globals, - imports.symbol_origins, - ) - .compile_typed(&typed_program) - .map_err(|err| err.to_string())?; - - // strip debug info (function names, variable names, line info) for release builds - if opt_level != OptimizationLevel::None { - function.strip_debug_info(); - } - - let manifest_bytes = loader.manifest().map(Manifest::to_bytes); - let should_bundle = loader - .manifest() - .map(|m| m.should_bundle_natives()) - .unwrap_or(false); - - let bytes = if should_bundle && !loader.loaded_native_modules().is_empty() { - let bundles = build_native_bundles(loader.loaded_native_modules())?; - aelys_bytecode::asm::serialize_with_manifest( - &function, - &heap, - manifest_bytes.as_deref(), - Some(&bundles), - ) - } else if manifest_bytes.is_some() { - aelys_bytecode::asm::serialize_with_manifest( - &function, - &heap, - manifest_bytes.as_deref(), - None, - ) - } else { - aelys_bytecode::asm::serialize(&function, &heap) - }; - - let output_path = output.unwrap_or_else(|| output_path_for(path)); - std::fs::write(&output_path, bytes) - .map_err(|err| format!("failed to write {}: {}", output_path.display(), err))?; - - Ok(CompileResult { - output_path, - warnings, - }) -} - -pub fn run_with_options( - path: &str, - output: Option, - opt_level: OptimizationLevel, - warn_config: WarningConfig, -) -> Result { - let output = output.map(PathBuf::from); - let result = compile_to_avbc_with_output(Path::new(path), output, opt_level, None)?; - for w in &result.warnings { - if warn_config.is_enabled(&w.kind) { - eprintln!("{}", w); + Err(String::new()) // Signal error exit without double printing } } - - if warn_config.treat_as_error && !result.warnings.is_empty() { - let count = result.warnings.len(); - return Err(format!( - "aborting due to {} warning{}", - count, - if count == 1 { "" } else { "s" } - )); - } - - eprintln!("Wrote {}", result.output_path.display()); - Ok(0) } -pub fn emit_air(path: &str, opt_level: OptimizationLevel) -> Result { +pub fn emit_air_program(path: &str, opt_level: OptimizationLevel) -> Result { let path = Path::new(path); - let content = std::fs::read_to_string(path) - .map_err(|err| format!("failed to read {}: {}", path.display(), err))?; - - let name = path.display().to_string(); - let src = Source::new(&name, &content); - - let tokens = Lexer::with_source(src.clone()) - .scan() - .map_err(|err| err.to_string())?; - let stmts = Parser::new(tokens, src.clone()) - .parse() - .map_err(|err| err.to_string())?; - - let mut vm = VM::with_config_and_args(src.clone(), VmConfig::default(), Vec::new()) - .map_err(|err| err.to_string())?; - if let Ok(abs_path) = path.canonicalize() { - vm.set_script_path(abs_path.display().to_string()); - } else { - vm.set_script_path(path.display().to_string()); - } - - let (imports, _) = load_modules_with_loader(&stmts, path, src.clone(), &mut vm) - .map_err(|err| err.to_string())?; - - let main_stmts: Vec<_> = stmts - .into_iter() - .filter(|stmt| !matches!(stmt.kind, StmtKind::Needs(_))) - .collect(); - - let mut all_known_globals = imports.known_globals.clone(); - for builtin in BUILTIN_NAMES { - all_known_globals.insert(builtin.to_string()); - } - - let typed_program = aelys_sema::TypeInference::infer_program_with_imports( - main_stmts, - src, - imports.module_aliases, - all_known_globals, - ) - .map_err(|errors| { - errors - .first() - .map(|e| e.to_string()) - .unwrap_or_else(|| "Unknown type error".to_string()) - })?; - - let mut optimizer = Optimizer::new(opt_level); - let typed_program = optimizer.optimize(typed_program); - - let mut air = aelys_air::lower::lower(&typed_program); - aelys_air::layout::compute_layouts(&mut air); - let air = aelys_air::mono::monomorphize(air); - + let air = lower_file_to_air(path, opt_level)?; print!("{}", aelys_air::print::print_program(&air)); Ok(0) } - -fn output_path_for(path: &Path) -> PathBuf { - let mut output = path.to_path_buf(); - output.set_extension("avbc"); - output -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum CompileInput { - Source, - Assembly, - Bytecode, -} - -fn detect_format(path: &Path) -> CompileInput { - let ext = path - .extension() - .and_then(|s| s.to_str()) - .unwrap_or("") - .to_ascii_lowercase(); - match ext.as_str() { - "aasm" => CompileInput::Assembly, - "avbc" => CompileInput::Bytecode, - _ => CompileInput::Source, - } -} - -fn assemble_to_avbc(path: &Path, output: Option) -> Result { - let content = std::fs::read_to_string(path) - .map_err(|err| format!("failed to read {}: {}", path.display(), err))?; - let (functions, heap) = - aelys_bytecode::asm::assemble(&content).map_err(|err| err.to_string())?; - if functions.is_empty() { - return Err("no functions found in assembly file".to_string()); - } - let function = reconstruct_function_hierarchy(functions); - let bytes = aelys_bytecode::asm::serialize(&function, &heap); - let output_path = output.unwrap_or_else(|| output_path_for(path)); - std::fs::write(&output_path, bytes) - .map_err(|err| format!("failed to write {}: {}", output_path.display(), err))?; - Ok(output_path) -} - -fn reconstruct_function_hierarchy( - mut functions: Vec, -) -> aelys_bytecode::Function { - if functions.len() <= 1 { - return functions - .into_iter() - .next() - .unwrap_or_else(|| aelys_bytecode::Function::new(None, 0)); - } - - let mut main_func = functions.remove(0); - main_func.nested_functions = functions; - main_func -} - -// bundle native modules into the .avbc for distribution -fn build_native_bundles( - modules: &std::collections::HashMap, -) -> Result, String> { - let mut bundles = Vec::new(); - for (name, info) in modules { - let bytes = std::fs::read(&info.file_path) - .map_err(|err| format!("failed to read {}: {}", info.file_path.display(), err))?; - let checksum = compute_simple_hash(&bytes); - let target = current_target_triple(); - bundles.push(NativeBundle { - name: name.clone(), - target, - checksum, - bytes, - }); - } - Ok(bundles) -} - -fn compute_simple_hash(data: &[u8]) -> String { - const FNV_OFFSET: u64 = 0xcbf29ce484222325; - const FNV_PRIME: u64 = 0x100000001b3; - let mut hash = FNV_OFFSET; - for &byte in data { - hash ^= u64::from(byte); - hash = hash.wrapping_mul(FNV_PRIME); - } - format!("{:016x}", hash) -} - -fn current_target_triple() -> String { - #[cfg(all(target_os = "linux", target_arch = "x86_64"))] - { - "x86_64-unknown-linux-gnu".to_string() - } - #[cfg(all(target_os = "linux", target_arch = "aarch64"))] - { - "aarch64-unknown-linux-gnu".to_string() - } - #[cfg(all(target_os = "macos", target_arch = "x86_64"))] - { - "x86_64-apple-darwin".to_string() - } - #[cfg(all(target_os = "macos", target_arch = "aarch64"))] - { - "aarch64-apple-darwin".to_string() - } - #[cfg(all(target_os = "windows", target_arch = "x86_64"))] - { - "x86_64-pc-windows-msvc".to_string() - } - #[cfg(not(any( - all(target_os = "linux", target_arch = "x86_64"), - all(target_os = "linux", target_arch = "aarch64"), - all(target_os = "macos", target_arch = "x86_64"), - all(target_os = "macos", target_arch = "aarch64"), - all(target_os = "windows", target_arch = "x86_64"), - )))] - { - "unknown".to_string() - } -} diff --git a/cli/src/cli/commands/explain.rs b/cli/src/cli/commands/explain.rs new file mode 100644 index 0000000..e5bf8eb --- /dev/null +++ b/cli/src/cli/commands/explain.rs @@ -0,0 +1,14 @@ +use aelys_common::registry; + +pub fn run_explain(code: &str) { + match registry::lookup(code) { + Some(info) => { + eprintln!("{}: {}\n", info.code, info.title); + eprintln!("{}", info.explanation); + } + None => { + eprintln!("error: unknown error code: {}", code); + eprintln!("Use 'aelys --explain EXXXX' with a valid error code."); + } + } +} diff --git a/cli/src/cli/commands/repl.rs b/cli/src/cli/commands/repl.rs deleted file mode 100644 index 4c42c01..0000000 --- a/cli/src/cli/commands/repl.rs +++ /dev/null @@ -1,74 +0,0 @@ -// TODO: rustyline for history/completion would be nice - -use crate::cli::vm_config::parse_vm_args_or_error; -use aelys::{new_vm_with_config, run_with_vm_and_opt}; -use aelys_opt::OptimizationLevel; -use std::io::{self, BufRead, IsTerminal, Write}; - -#[allow(dead_code)] -pub fn run_repl_with_io( - input: R, - output: W, - opt_level: OptimizationLevel, - vm_args: Vec, -) -> Result<(), String> { - run_repl_core(input, output, opt_level, vm_args, false) -} - -pub fn run_with_options(opt_level: OptimizationLevel, vm_args: Vec) -> Result { - let stdin = io::stdin(); - let stdout = io::stdout(); - let interactive = stdin.is_terminal() && stdout.is_terminal(); - run_repl_core(stdin.lock(), stdout.lock(), opt_level, vm_args, interactive)?; - Ok(0) -} - -fn run_repl_core( - mut input: R, - mut output: W, - opt_level: OptimizationLevel, - vm_args: Vec, - interactive: bool, -) -> Result<(), String> { - let parsed = parse_vm_args_or_error(&vm_args)?; - let mut vm = new_vm_with_config(parsed.config, Vec::new()).map_err(|err| err.to_string())?; - - if interactive { - writeln!(output, "Aelys REPL (type 'exit' to quit)").map_err(|err| err.to_string())?; - } - - let mut line = String::new(); - loop { - if interactive { - write!(output, "aelys> ").map_err(|err| err.to_string())?; - output.flush().map_err(|err| err.to_string())?; - } - - line.clear(); - let bytes = input.read_line(&mut line).map_err(|err| err.to_string())?; - if bytes == 0 { - break; - } - - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - if trimmed == "exit" || trimmed == "quit" { - break; - } - - match run_with_vm_and_opt(&mut vm, trimmed, "", opt_level) { - Ok(value) => { - if !value.is_null() { - writeln!(output, "{}", value).map_err(|err| err.to_string())?; - } - } - Err(err) => { - writeln!(output, "{}", err).map_err(|err| err.to_string())?; - } - } - } - - Ok(()) -} diff --git a/cli/src/cli/commands/run.rs b/cli/src/cli/commands/run.rs deleted file mode 100644 index ebe9d0a..0000000 --- a/cli/src/cli/commands/run.rs +++ /dev/null @@ -1,380 +0,0 @@ -use crate::cli::vm_config::parse_vm_args_or_error; -use aelys_common::{WarningConfig, format_warnings}; -use aelys_driver::run_file_full; -use aelys_modules::manifest::Manifest; -use aelys_opt::OptimizationLevel; -use aelys_runtime::VM; -use aelys_runtime::native::NativeLoader; -use aelys_syntax::{ImportKind, NeedsStmt, Source, Span}; -use semver::{Version, VersionReq}; -use std::collections::{HashMap, HashSet}; -use std::path::Path; - -pub fn run_with_options( - path: &str, - program_args: Vec, - vm_args: Vec, - opt_level: OptimizationLevel, - warn_config: WarningConfig, -) -> Result { - let parsed = parse_vm_args_or_error(&vm_args)?; - let config = parsed.config; - - let path_ref = Path::new(path); - let value = match detect_format(path_ref)? { - InputFormat::Assembly => run_aasm_file(path_ref, config, program_args)?, - InputFormat::Bytecode => run_avbc_file(path_ref, config, program_args)?, - InputFormat::Source => { - ensure_utf8_source(path_ref)?; - let result = run_file_full(path_ref, config, program_args, opt_level) - .map_err(|err| err.to_string())?; - - let filtered: Vec<_> = result - .warnings - .iter() - .filter(|w| warn_config.is_enabled(&w.kind)) - .collect(); - - for w in &filtered { - eprintln!("{}", format_warnings(std::slice::from_ref(*w))); - } - - if warn_config.treat_as_error && !filtered.is_empty() { - return Err(format!("{} warning(s) treated as errors", filtered.len())); - } - - result.value - } - }; - - if !value.is_null() { - println!("{}", value); - } - Ok(0) -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum InputFormat { - Source, - Assembly, - Bytecode, -} - -fn detect_format(path: &Path) -> Result { - let ext = path - .extension() - .and_then(|s| s.to_str()) - .unwrap_or("") - .to_ascii_lowercase(); - - if ext == "aasm" { - return Ok(InputFormat::Assembly); - } - - if is_bytecode(path)? { - return Ok(InputFormat::Bytecode); - } - - if ext == "avbc" { - return Err(format!("{} is not valid bytecode (VBXQ)", path.display())); - } - - Ok(InputFormat::Source) -} - -fn is_bytecode(path: &Path) -> Result { - // VBXQ magic bytes - use std::io::Read; - let mut file = std::fs::File::open(path) - .map_err(|err| format!("failed to open {}: {}", path.display(), err))?; - let mut magic = [0u8; 4]; - match file.read_exact(&mut magic) { - Ok(()) => Ok(&magic == aelys_bytecode::asm::binary::MAGIC), - Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => Ok(false), - Err(err) => Err(format!("failed to read {}: {}", path.display(), err)), - } -} - -fn run_aasm_file( - path: &Path, - config: aelys_runtime::VmConfig, - program_args: Vec, -) -> Result { - let content = std::fs::read_to_string(path) - .map_err(|err| format!("failed to read {}: {}", path.display(), err))?; - let (functions, mut heap) = - aelys_bytecode::asm::assemble(&content).map_err(|err| err.to_string())?; - if functions.is_empty() { - return Err("no functions found in assembly file".to_string()); - } - - let mut function = reconstruct_function_hierarchy(functions); - - let src = Source::new(path.display().to_string(), ""); - let mut vm = VM::with_config_and_args(src.clone(), config, program_args) - .map_err(|err| err.to_string())?; - if let Ok(abs_path) = path.canonicalize() { - vm.set_script_path(abs_path.display().to_string()); - } else { - vm.set_script_path(path.display().to_string()); - } - - let required_modules = collect_required_modules(&function); - load_required_modules(&mut vm, path, src, &required_modules, None, &HashMap::new())?; - - let remap = vm.merge_heap(&mut heap).map_err(|err| err.to_string())?; - function.remap_constants(&remap); - - let func_ref = vm.alloc_function(function).map_err(|err| err.to_string())?; - vm.execute(func_ref).map_err(|err| err.to_string()) -} - -fn run_avbc_file( - path: &Path, - config: aelys_runtime::VmConfig, - program_args: Vec, -) -> Result { - let bytes = - std::fs::read(path).map_err(|err| format!("failed to read {}: {}", path.display(), err))?; - let (mut function, mut heap, manifest_bytes, bundles) = - aelys_bytecode::asm::deserialize_with_manifest(&bytes).map_err(|err| err.to_string())?; - - let manifest = match manifest_bytes.as_deref() { - Some(bytes) => Some(Manifest::from_bytes(bytes).map_err(|err| err.to_string())?), - None => None, - }; - - let bundled_modules: HashMap = - bundles.into_iter().map(|b| (b.name.clone(), b)).collect(); - - let src = Source::new(path.display().to_string(), ""); - let mut vm = VM::with_config_and_args(src.clone(), config, program_args) - .map_err(|err| err.to_string())?; - if let Ok(abs_path) = path.canonicalize() { - vm.set_script_path(abs_path.display().to_string()); - } else { - vm.set_script_path(path.display().to_string()); - } - - let required_modules = collect_required_modules(&function); - load_required_modules( - &mut vm, - path, - src, - &required_modules, - manifest.as_ref(), - &bundled_modules, - )?; - - let remap = vm.merge_heap(&mut heap).map_err(|err| err.to_string())?; - function.remap_constants(&remap); - - let func_ref = vm.alloc_function(function).map_err(|err| err.to_string())?; - vm.execute(func_ref).map_err(|err| err.to_string()) -} - -fn collect_required_modules(function: &aelys_bytecode::Function) -> HashSet { - let mut modules = HashSet::new(); - collect_required_modules_rec(function, &mut modules); - modules -} - -fn collect_required_modules_rec( - function: &aelys_bytecode::Function, - modules: &mut HashSet, -) { - for name in function.global_layout.names() { - if let Some(module_name) = name.split("::").next() - && name.contains("::") - { - modules.insert(module_name.to_string()); - } - } - for nested in &function.nested_functions { - collect_required_modules_rec(nested, modules); - } -} - -fn load_required_modules( - vm: &mut VM, - entry_path: &Path, - source: std::sync::Arc, - modules: &HashSet, - manifest: Option<&Manifest>, - bundled_modules: &HashMap, -) -> Result<(), String> { - let mut loader = aelys_driver::modules::ModuleLoader::with_manifest( - entry_path, - source.clone(), - manifest.cloned(), - ); - - for module_name in modules { - if let Some(bundle) = bundled_modules.get(module_name) { - load_bundled_module(vm, module_name, bundle, manifest)?; - continue; - } - - if try_load_std_module(vm, &mut loader, module_name).is_ok() { - continue; - } - - let needs = NeedsStmt { - path: vec![module_name.clone()], - kind: ImportKind::Module { alias: None }, - span: Span::dummy(), - }; - loader - .load_module(&needs, vm) - .map_err(|err| err.to_string())?; - } - - Ok(()) -} - -fn try_load_std_module( - vm: &mut VM, - loader: &mut aelys_driver::modules::ModuleLoader, - module_name: &str, -) -> Result<(), String> { - let needs = NeedsStmt { - path: vec!["std".to_string(), module_name.to_string()], - kind: ImportKind::Module { alias: None }, - span: Span::dummy(), - }; - loader - .load_module(&needs, vm) - .map(|_| ()) - .map_err(|err| err.to_string()) -} - -fn load_bundled_module( - vm: &mut VM, - module_name: &str, - bundle: &aelys_bytecode::asm::NativeBundle, - manifest: Option<&Manifest>, -) -> Result<(), String> { - if let Some(policy) = manifest.and_then(|m| m.module(module_name)) { - if !policy.capabilities.is_empty() - && let Err(denied) = vm.config().check_native_capabilities(&policy.capabilities) - { - return Err(format!( - "native capability denied for {}: {}", - module_name, denied - )); - } - - if let Some(expected) = &policy.checksum { - let actual = compute_simple_hash(&bundle.bytes); - if &actual != expected { - return Err(format!( - "native checksum mismatch for {} (expected {}, got {})", - module_name, expected, actual - )); - } - } - } - - let loader = NativeLoader::new(); - let native_module = loader - .load_embedded(&bundle.name, &bundle.bytes) - .map_err(|err| err.to_string())?; - - if let Some(policy) = manifest.and_then(|m| m.module(module_name)) - && let Some(required) = &policy.required_version - { - let ok = match (&native_module.version, VersionReq::parse(required)) { - (Some(found), Ok(req)) => Version::parse(found) - .map(|v| req.matches(&v)) - .unwrap_or(false), - _ => false, - }; - if !ok { - return Err(format!( - "native version mismatch for {} (required {}, found {:?})", - module_name, required, native_module.version - )); - } - } - - register_native_module(&native_module, module_name, vm)?; - vm.register_native_module(module_name.to_string(), native_module); - Ok(()) -} - -fn register_native_module( - native_module: &aelys_modules::native::NativeModule, - module_alias: &str, - vm: &mut VM, -) -> Result<(), String> { - use aelys_native::AelysExportKind; - - for (name, export) in &native_module.exports { - let qualified_name = format!("{}::{}", module_alias, name); - match export.kind { - AelysExportKind::Function => { - if export.value.is_null() { - return Err(format!("null function pointer for {}", name)); - } - let func = unsafe { - std::mem::transmute::<*const std::ffi::c_void, aelys_native::AelysNativeFn>( - export.value, - ) - }; - let func_ref = vm - .alloc_foreign(&qualified_name, export.arity, func) - .map_err(|err| err.to_string())?; - vm.set_global(qualified_name, aelys_runtime::Value::ptr(func_ref.index())); - } - AelysExportKind::Constant => { - if export.value.is_null() { - return Err(format!("null constant pointer for {}", name)); - } - let raw = unsafe { *(export.value as *const u64) }; - vm.set_global(qualified_name, aelys_runtime::Value::from_raw(raw)); - } - AelysExportKind::Type => { - vm.set_global(qualified_name, aelys_runtime::Value::null()); - } - } - } - Ok(()) -} - -// FNV-1a, good enough for integrity checks -fn compute_simple_hash(data: &[u8]) -> String { - const FNV_OFFSET: u64 = 0xcbf29ce484222325; - const FNV_PRIME: u64 = 0x100000001b3; - let mut hash = FNV_OFFSET; - for &byte in data { - hash ^= u64::from(byte); - hash = hash.wrapping_mul(FNV_PRIME); - } - format!("{:016x}", hash) -} - -fn ensure_utf8_source(path: &Path) -> Result<(), String> { - match std::fs::read_to_string(path) { - Ok(_) => Ok(()), - Err(err) if err.kind() == std::io::ErrorKind::InvalidData => Err(format!( - "{} is not UTF-8 text and not bytecode (VBXQ)", - path.display() - )), - Err(_) => Ok(()), - } -} - -fn reconstruct_function_hierarchy( - mut functions: Vec, -) -> aelys_bytecode::Function { - if functions.len() <= 1 { - return functions - .into_iter() - .next() - .unwrap_or_else(|| aelys_bytecode::Function::new(None, 0)); - } - - let mut main_func = functions.remove(0); - main_func.nested_functions = functions; - main_func -} diff --git a/cli/src/cli/mod.rs b/cli/src/cli/mod.rs index f07d543..a86778b 100644 --- a/cli/src/cli/mod.rs +++ b/cli/src/cli/mod.rs @@ -1,11 +1,8 @@ pub mod args; -pub mod vm_config; pub mod commands { - pub mod asm; pub mod compile; - pub mod repl; - pub mod run; + pub mod explain; } use aelys_common::WarningConfig; @@ -31,7 +28,7 @@ pub fn run() -> i32 { } dispatch(parsed).unwrap_or_else(|err| { - eprintln!("Error: {}", err); + eprintln!("{}", err); 1 }) } @@ -50,47 +47,40 @@ fn parse_warning_config(flags: &[String]) -> Result { Ok(config) } +fn color_config_from_choice(choice: &args::ColorChoice) -> aelys_common::ColorConfig { + match choice { + args::ColorChoice::Auto => aelys_common::ColorConfig::auto(), + args::ColorChoice::Always => aelys_common::ColorConfig::always(), + args::ColorChoice::Never => aelys_common::ColorConfig::never(), + } +} + fn dispatch(parsed: args::ParsedArgs) -> Result { let warn_config = parse_warning_config(&parsed.warning_flags)?; + let color = color_config_from_choice(&parsed.color); match parsed.command { args::Command::Help => Ok(0), args::Command::Version => Ok(0), - args::Command::Run { path, program_args } => commands::run::run_with_options( - &path, - program_args, - parsed.vm_args, - parsed.opt_level, - warn_config, - ), + args::Command::Explain { code } => { + commands::explain::run_explain(&code); + Ok(0) + } args::Command::Compile { path, output, emit_air, - } => { - if !parsed.vm_args.is_empty() { - return Err("vm flags are only supported for run or repl".to_string()); - } - if emit_air { - commands::compile::emit_air(&path, parsed.opt_level) - } else { - commands::compile::run_with_options(&path, output, parsed.opt_level, warn_config) - } - } - - args::Command::Asm { - path, + emit_llvm_ir, + } => commands::compile::run_with_options( + &path, output, - stdout, - } => { - commands::asm::run_with_options(&path, output, stdout, parsed.opt_level, parsed.vm_args) - } - - args::Command::Repl => { - let repl_opt = aelys_opt::OptimizationLevel::Basic; - commands::repl::run_with_options(repl_opt, parsed.vm_args) - } + parsed.opt_level, + warn_config, + emit_air, + emit_llvm_ir, + &color, + ), } } diff --git a/cli/src/cli/vm_config.rs b/cli/src/cli/vm_config.rs deleted file mode 100644 index b878788..0000000 --- a/cli/src/cli/vm_config.rs +++ /dev/null @@ -1,5 +0,0 @@ -use aelys_runtime::{VmArgsParsed, parse_vm_args}; - -pub fn parse_vm_args_or_error(args: &[String]) -> Result { - parse_vm_args(args).map_err(|err| err.to_string()) -} diff --git a/cli/tests/cli_args_tests.rs b/cli/tests/cli_args_tests.rs deleted file mode 100644 index 00fcd88..0000000 --- a/cli/tests/cli_args_tests.rs +++ /dev/null @@ -1,260 +0,0 @@ -use aelys_opt::OptimizationLevel; - -use aelys_cli::cli::args::{Command, ParsedArgs, parse_args}; - -#[test] -fn parse_run_with_flags_anywhere() { - let args = vec![ - "aelys", - "-O3", - "-ae.trusted=true", - "run", - "main.aelys", - "arg1", - "-x", - ] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!( - parsed, - ParsedArgs { - command: Command::Run { - path: "main.aelys".to_string(), - program_args: vec!["arg1".to_string(), "-x".to_string()], - }, - vm_args: vec!["-ae.trusted=true".to_string()], - opt_level: OptimizationLevel::Aggressive, - warning_flags: Vec::new(), - } - ); -} - -#[test] -fn parse_implicit_run_path_first() { - let args = vec!["aelys", "main.aelys", "-O1"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!( - parsed, - ParsedArgs { - command: Command::Run { - path: "main.aelys".to_string(), - program_args: Vec::new(), - }, - vm_args: Vec::new(), - opt_level: OptimizationLevel::Basic, - warning_flags: Vec::new(), - } - ); -} - -#[test] -fn parse_repl_with_vm_flags() { - let args = vec!["aelys", "repl", "-ae.max-heap=1M"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!( - parsed, - ParsedArgs { - command: Command::Repl, - vm_args: vec!["-ae.max-heap=1M".to_string()], - opt_level: OptimizationLevel::Standard, - warning_flags: Vec::new(), - } - ); -} - -#[test] -fn parse_unknown_flag_errors_before_path() { - let args = vec!["aelys", "-Z"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let err = parse_args(&args).unwrap_err(); - assert!(err.contains("unknown flag")); -} - -#[test] -fn parse_compile_output_flag() { - let args = vec!["aelys", "compile", "-o", "out.avbc", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!( - parsed, - ParsedArgs { - command: Command::Compile { - path: "main.aelys".to_string(), - output: Some("out.avbc".to_string()), - emit_air: false, - }, - vm_args: Vec::new(), - opt_level: OptimizationLevel::Standard, - warning_flags: Vec::new(), - } - ); -} - -#[test] -fn parse_asm_stdout_flag() { - let args = vec!["aelys", "asm", "--stdout", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!( - parsed, - ParsedArgs { - command: Command::Asm { - path: "main.aelys".to_string(), - output: None, - stdout: true, - }, - vm_args: Vec::new(), - opt_level: OptimizationLevel::Standard, - warning_flags: Vec::new(), - } - ); -} - -#[test] -fn parse_version_command() { - let args = vec!["aelys", "--version"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!( - parsed, - ParsedArgs { - command: Command::Version, - vm_args: Vec::new(), - opt_level: OptimizationLevel::Standard, - warning_flags: Vec::new(), - } - ); -} - -#[test] -fn parse_allow_caps_with_equals() { - let args = vec!["aelys", "--allow-caps=fs,net", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert!(parsed.vm_args.contains(&"--allow-caps=fs,net".to_string())); -} - -#[test] -fn parse_allow_caps_with_space() { - let args = vec!["aelys", "--allow-caps", "fs,net", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert!(parsed.vm_args.contains(&"--allow-caps=fs,net".to_string())); -} - -#[test] -fn parse_deny_caps_with_equals() { - let args = vec!["aelys", "--deny-caps=exec", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert!(parsed.vm_args.contains(&"--deny-caps=exec".to_string())); -} - -#[test] -fn parse_deny_caps_with_space() { - let args = vec!["aelys", "--deny-caps", "exec", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert!(parsed.vm_args.contains(&"--deny-caps=exec".to_string())); -} - -#[test] -fn parse_warning_flags() { - let args = vec!["aelys", "-Wall", "-Werror", "-Wno-inline", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!(parsed.warning_flags, vec!["all", "error", "no-inline"]); -} - -#[test] -fn parse_warn_equals_syntax() { - let args = vec!["aelys", "--warn=error", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert!(parsed.warning_flags.contains(&"error".to_string())); -} - -#[test] -fn parse_powershell_split_ae_dot() { - // PowerShell splits "-ae.trusted=true" into ["-ae", ".trusted=true"] - let args = vec!["aelys", "main.aelys", "-ae", ".trusted=true"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!(parsed.vm_args, vec!["-ae.trusted=true".to_string()]); -} - -#[test] -fn parse_powershell_split_ae_dot_before_path() { - let args = vec!["aelys", "-ae", ".trusted=true", "main.aelys"] - .into_iter() - .map(|s| s.to_string()) - .collect::>(); - - let parsed = parse_args(&args).unwrap(); - - assert_eq!(parsed.vm_args, vec!["-ae.trusted=true".to_string()]); - assert_eq!( - parsed.command, - Command::Run { - path: "main.aelys".to_string(), - program_args: Vec::new(), - } - ); -} diff --git a/cli/tests/cli_asm_tests.rs b/cli/tests/cli_asm_tests.rs deleted file mode 100644 index 140a0db..0000000 --- a/cli/tests/cli_asm_tests.rs +++ /dev/null @@ -1,15 +0,0 @@ -use aelys_cli::cli::commands::asm::asm_transform; - -#[test] -fn asm_emits_aasm_from_source() { - let dir = std::env::temp_dir().join("aelys_cli_asm_test"); - let _ = std::fs::remove_dir_all(&dir); - std::fs::create_dir_all(&dir).unwrap(); - let src_path = dir.join("sample.aelys"); - std::fs::write(&src_path, "let x = 1\n").unwrap(); - - let output = asm_transform(&src_path).unwrap(); - - assert!(output.exists()); - assert_eq!(output.extension().unwrap(), "aasm"); -} diff --git a/cli/tests/cli_compile_manifest_tests.rs b/cli/tests/cli_compile_manifest_tests.rs deleted file mode 100644 index a0dbbd4..0000000 --- a/cli/tests/cli_compile_manifest_tests.rs +++ /dev/null @@ -1,23 +0,0 @@ -use aelys_cli::cli::commands::compile::compile_to_avbc; -use aelys_opt::OptimizationLevel; - -#[test] -fn compile_includes_manifest_when_present() { - let dir = std::env::temp_dir().join("aelys_cli_manifest"); - let _ = std::fs::remove_dir_all(&dir); - std::fs::create_dir_all(&dir).unwrap(); - - let src_path = dir.join("main.aelys"); - std::fs::write(&src_path, "let x = 1\n").unwrap(); - - let manifest_path = dir.join("main.aelys.toml"); - std::fs::write(&manifest_path, "[build]\nbundle_native_modules = true\n").unwrap(); - - let output = compile_to_avbc(&src_path, OptimizationLevel::None).unwrap(); - let bytes = std::fs::read(output).unwrap(); - - let (_func, _heap, manifest_bytes, _bundles) = - aelys_bytecode::asm::deserialize_with_manifest(&bytes).unwrap(); - - assert!(manifest_bytes.is_some()); -} diff --git a/cli/tests/cli_compile_tests.rs b/cli/tests/cli_compile_tests.rs deleted file mode 100644 index 9e3bb0c..0000000 --- a/cli/tests/cli_compile_tests.rs +++ /dev/null @@ -1,16 +0,0 @@ -use aelys_cli::cli::commands::compile::compile_to_avbc; -use aelys_opt::OptimizationLevel; - -#[test] -fn compile_writes_avbc() { - let dir = std::env::temp_dir().join("aelys_cli_compile_test"); - let _ = std::fs::remove_dir_all(&dir); - std::fs::create_dir_all(&dir).unwrap(); - let src_path = dir.join("main.aelys"); - std::fs::write(&src_path, "let x = 1\n").unwrap(); - - let output = compile_to_avbc(&src_path, OptimizationLevel::None).unwrap(); - - assert!(output.exists()); - assert_eq!(output.extension().unwrap(), "avbc"); -} diff --git a/cli/tests/cli_dispatch_tests.rs b/cli/tests/cli_dispatch_tests.rs deleted file mode 100644 index 9bc4cff..0000000 --- a/cli/tests/cli_dispatch_tests.rs +++ /dev/null @@ -1,14 +0,0 @@ -use aelys_cli::cli::run_with_args; - -#[test] -fn compile_rejects_vm_args() { - let args = vec![ - "aelys".to_string(), - "compile".to_string(), - "main.aelys".to_string(), - "-ae.trusted=true".to_string(), - ]; - - let err = run_with_args(&args).unwrap_err(); - assert!(err.contains("vm flags are only supported for run or repl")); -} diff --git a/cli/tests/cli_repl_tests.rs b/cli/tests/cli_repl_tests.rs deleted file mode 100644 index 3516071..0000000 --- a/cli/tests/cli_repl_tests.rs +++ /dev/null @@ -1,37 +0,0 @@ -use aelys_cli::cli::commands::repl::run_repl_with_io; -use aelys_opt::OptimizationLevel; - -#[test] -fn repl_executes_input_and_exits() { - let input = "1 + 1\nexit\n"; - let mut output = Vec::new(); - - run_repl_with_io( - input.as_bytes(), - &mut output, - OptimizationLevel::None, - Vec::new(), - ) - .unwrap(); - - let text = String::from_utf8(output).unwrap(); - assert!(text.contains("2")); -} - -#[test] -fn repl_recovers_after_error() { - let input = "1 +\n1 + 1\nexit\n"; - let mut output = Vec::new(); - - run_repl_with_io( - input.as_bytes(), - &mut output, - OptimizationLevel::None, - Vec::new(), - ) - .unwrap(); - - let text = String::from_utf8(output).unwrap(); - assert!(text.to_lowercase().contains("error")); - assert!(text.contains("2")); -} diff --git a/cli/tests/cli_run_bytecode_tests.rs b/cli/tests/cli_run_bytecode_tests.rs deleted file mode 100644 index 68babbf..0000000 --- a/cli/tests/cli_run_bytecode_tests.rs +++ /dev/null @@ -1,55 +0,0 @@ -use aelys_cli::cli::commands::run::run_with_options; -use aelys_common::WarningConfig; -use aelys_opt::OptimizationLevel; - -#[test] -fn run_accepts_bytecode_with_magic() { - let dir = std::env::temp_dir().join("aelys_cli_run_bytecode"); - let _ = std::fs::remove_dir_all(&dir); - std::fs::create_dir_all(&dir).unwrap(); - let bytecode_path = dir.join("program.avbc"); - - let mut function = aelys_bytecode::Function::new(None, 0); - function.constants.push(aelys_bytecode::Value::int(2)); - function.num_registers = 1; - function.emit_b(aelys_bytecode::OpCode::LoadK, 0, 0, 1); - function.emit_a(aelys_bytecode::OpCode::Return, 0, 0, 0, 1); - function.finalize_bytecode(); - - let heap = aelys_bytecode::Heap::new(); - let bytes = aelys_bytecode::asm::serialize(&function, &heap); - std::fs::write(&bytecode_path, bytes).unwrap(); - - let result = run_with_options( - bytecode_path.to_str().unwrap(), - Vec::new(), - Vec::new(), - OptimizationLevel::Standard, - WarningConfig::new(), - ); - - assert!(result.is_ok()); -} - -#[test] -fn run_bytecode_registers_stdlib_globals() { - let dir = std::env::temp_dir().join("aelys_cli_run_stdlib"); - let _ = std::fs::remove_dir_all(&dir); - std::fs::create_dir_all(&dir).unwrap(); - let src_path = dir.join("hello.aelys"); - std::fs::write(&src_path, "needs std.io\nio.print(\"hi\")\n").unwrap(); - - let bytecode_path = - aelys_cli::cli::commands::compile::compile_to_avbc(&src_path, OptimizationLevel::None) - .unwrap(); - - let result = run_with_options( - bytecode_path.to_str().unwrap(), - Vec::new(), - Vec::new(), - OptimizationLevel::Standard, - WarningConfig::new(), - ); - - assert!(result.is_ok()); -} diff --git a/cli/tests/cli_run_tests.rs b/cli/tests/cli_run_tests.rs deleted file mode 100644 index e846a92..0000000 --- a/cli/tests/cli_run_tests.rs +++ /dev/null @@ -1,40 +0,0 @@ -use aelys_cli::cli::commands::run::run_with_options; -use aelys_common::WarningConfig; -use aelys_opt::OptimizationLevel; - -#[test] -fn run_rejects_invalid_vm_args() { - let err = run_with_options( - "missing.aelys", - Vec::new(), - vec!["-ae.max-heap=1".to_string()], - OptimizationLevel::None, - WarningConfig::new(), - ) - .unwrap_err(); - - assert!(err.contains("invalid value for")); -} - -#[test] -fn run_accepts_aasm_file() { - let dir = std::env::temp_dir().join("aelys_cli_run_aasm"); - let _ = std::fs::remove_dir_all(&dir); - std::fs::create_dir_all(&dir).unwrap(); - let aasm_path = dir.join("program.aasm"); - std::fs::write( - &aasm_path, - ".version 1\n.function 0\n .arity 0\n .registers 1\n .constants\n 0: int 2\n .code\n 0000: LoadK r0, 0\n 0001: Return r0\n", - ) - .unwrap(); - - let result = run_with_options( - aasm_path.to_str().unwrap(), - Vec::new(), - Vec::new(), - OptimizationLevel::Standard, - WarningConfig::new(), - ); - - assert!(result.is_ok()); -} diff --git a/cli/tests/cli_vm_config_tests.rs b/cli/tests/cli_vm_config_tests.rs deleted file mode 100644 index dcf75a6..0000000 --- a/cli/tests/cli_vm_config_tests.rs +++ /dev/null @@ -1,50 +0,0 @@ -use aelys_cli::cli::vm_config::parse_vm_args_or_error; - -#[test] -fn vm_args_parse_trusted_and_caps() { - let args = vec![ - "-ae.trusted=true".to_string(), - "--allow-caps=fs,net".to_string(), - ]; - - let parsed = parse_vm_args_or_error(&args).unwrap(); - - assert!(parsed.config.capabilities.allow_fs); - assert!(parsed.config.capabilities.allow_net); - assert!(parsed.config.capabilities.allow_exec); -} - -#[test] -fn vm_args_allow_caps_enables_capabilities() { - let args = vec!["--allow-caps=fs".to_string()]; - let parsed = parse_vm_args_or_error(&args).unwrap(); - - assert!(parsed.config.capabilities.allow_fs); - assert!(!parsed.config.capabilities.allow_net); - assert!(!parsed.config.capabilities.allow_exec); - assert!(parsed.config.allowed_caps.contains("fs")); -} - -#[test] -fn vm_args_allow_multiple_caps() { - let args = vec!["--allow-caps=fs,net,exec".to_string()]; - let parsed = parse_vm_args_or_error(&args).unwrap(); - - assert!(parsed.config.capabilities.allow_fs); - assert!(parsed.config.capabilities.allow_net); - assert!(parsed.config.capabilities.allow_exec); -} - -#[test] -fn vm_args_deny_caps_disables_capabilities() { - let args = vec![ - "--allow-caps=fs,net,exec".to_string(), - "--deny-caps=fs".to_string(), - ]; - let parsed = parse_vm_args_or_error(&args).unwrap(); - - // deny overrides allow - assert!(!parsed.config.capabilities.allow_fs); - assert!(parsed.config.capabilities.allow_net); - assert!(parsed.config.capabilities.allow_exec); -} diff --git a/cli/tests/warning_handling_tests.rs b/cli/tests/warning_handling_tests.rs new file mode 100644 index 0000000..7892321 --- /dev/null +++ b/cli/tests/warning_handling_tests.rs @@ -0,0 +1,78 @@ +use aelys_cli::cli::run_with_args; +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +fn write_temp_source(prefix: &str, source: &str) -> PathBuf { + let mut path = std::env::temp_dir(); + let stamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock should be monotonic enough for test naming") + .as_nanos(); + path.push(format!("aelys_cli_{prefix}_{stamp}.aelys")); + fs::write(&path, source).expect("failed to write temp source file"); + path +} + +#[test] +fn werror_turns_warnings_into_failure() { + let path = write_temp_source( + "werror", + r#" +struct Point { x: i64 } +struct Point { y: i64 } + +fn probe() -> i64 { + return 0 +} +"#, + ); + + let args = vec![ + "aelys".to_string(), + "compile".to_string(), + path.display().to_string(), + "-Werror".to_string(), + ]; + + let result = run_with_args(&args); + let _ = fs::remove_file(&path); + let _ = fs::remove_file(path.with_extension("obj")); + let _ = fs::remove_file(path.with_extension("o")); + + let err = result.expect_err("expected -Werror to fail on warning"); + assert!(err.contains("aborting due to"), "{err}"); +} + +#[test] +fn disabling_type_warnings_allows_build() { + let path = write_temp_source( + "wdisable", + r#" +struct Point { x: i64 } +struct Point { y: i64 } + +fn probe() -> i64 { + return 0 +} +"#, + ); + + let args = vec![ + "aelys".to_string(), + "compile".to_string(), + path.display().to_string(), + "-Werror".to_string(), + "-Wno-type".to_string(), + ]; + + let result = run_with_args(&args); + let _ = fs::remove_file(&path); + let _ = fs::remove_file(path.with_extension("obj")); + let _ = fs::remove_file(path.with_extension("o")); + + assert!( + result.is_ok(), + "type warnings were disabled, got {result:?}" + ); +} diff --git a/codegen/Cargo.toml b/codegen/Cargo.toml new file mode 100644 index 0000000..f7a7cb8 --- /dev/null +++ b/codegen/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "aelys-codegen" +version.workspace = true +edition = "2024" + +[dependencies] +aelys-air = { path = "../air" } +inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", default-features = false, features = ["llvm18-1", "target-x86"] } + +[dev-dependencies] +tempfile = "3" + +[lib] +doctest = false diff --git a/codegen/src/api/context.rs b/codegen/src/api/context.rs new file mode 100644 index 0000000..b668277 --- /dev/null +++ b/codegen/src/api/context.rs @@ -0,0 +1,133 @@ +use crate::CodegenError; +use aelys_air::AirProgram; +use inkwell::OptimizationLevel; +use inkwell::builder::Builder; +use inkwell::context::Context; +use inkwell::module::Module; +use inkwell::passes::PassBuilderOptions; +use inkwell::targets::{ + CodeModel, FileType, InitializationConfig, RelocMode, Target, TargetMachine, +}; +use std::path::Path; + +pub struct CodegenContext { + pub(crate) context: &'static Context, + pub(crate) module: Module<'static>, + pub(crate) builder: Builder<'static>, +} + +impl CodegenContext { + pub fn new(module_name: &str) -> Self { + let context = Box::leak(Box::new(Context::create())); + let module = context.create_module(module_name); + let builder = context.create_builder(); + + // Set target triple and data layout immediately so ABI decisions + // (for eg sret, struct sizes/alignments) are correct during codegen + Target::initialize_native(&InitializationConfig::default()) + .expect("LLVM native target initialization failed"); + let triple = TargetMachine::get_default_triple(); + module.set_triple(&triple); + + if let Ok(target) = Target::from_triple(&triple) { + if let Some(machine) = target.create_target_machine( + &triple, + "generic", + "", + OptimizationLevel::None, + RelocMode::Default, + CodeModel::Default, + ) { + module.set_data_layout(&machine.get_target_data().get_data_layout()); + } + } + + Self { + context, + module, + builder, + } + } + + pub(crate) fn target_is_windows(&self) -> bool { + crate::module_targets_windows(&self.module) + } + + pub fn compile(&mut self, program: &AirProgram) -> Result<(), CodegenError> { + let _ = self.builder.get_insert_block(); + self.declare_struct_types(program)?; + self.declare_functions(program)?; + self.declare_globals(program)?; + self.define_function_bodies(program)?; + self.emit_entry_wrapper(program)?; + self.module + .verify() + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + + pub fn optimize(&self, pass_pipeline: &str, opt_numeric: u8) -> Result<(), CodegenError> { + let triple = TargetMachine::get_default_triple(); + let target = + Target::from_triple(&triple).map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let cpu = TargetMachine::get_host_cpu_name().to_string(); + let features = TargetMachine::get_host_cpu_features().to_string(); + let machine = target + .create_target_machine( + &triple, + &cpu, + &features, + inkwell_opt_level(opt_numeric), + // PIC so it works on Linux + RelocMode::PIC, + CodeModel::Default, + ) + .ok_or_else(|| { + CodegenError::LlvmError("failed to create target machine".to_string()) + })?; + + self.module + .run_passes(pass_pipeline, &machine, PassBuilderOptions::create()) + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + + pub fn emit_object(&self, path: &str, opt_numeric: u8) -> Result<(), CodegenError> { + Target::initialize_native(&InitializationConfig::default()) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let triple = TargetMachine::get_default_triple(); + let target = + Target::from_triple(&triple).map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let cpu = TargetMachine::get_host_cpu_name().to_string(); + let features = TargetMachine::get_host_cpu_features().to_string(); + let target_machine = target + .create_target_machine( + &triple, + &cpu, + &features, + inkwell_opt_level(opt_numeric), + RelocMode::PIC, + CodeModel::Default, + ) + .ok_or_else(|| { + CodegenError::LlvmError("failed to create target machine".to_string()) + })?; + + target_machine + .write_to_file(&self.module, FileType::Object, Path::new(path)) + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + + pub fn emit_ir(&self, path: &str) -> Result<(), CodegenError> { + self.module + .print_to_file(path) + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } +} + +fn inkwell_opt_level(numeric: u8) -> OptimizationLevel { + match numeric { + 0 => OptimizationLevel::None, + 1 => OptimizationLevel::Less, + 3 => OptimizationLevel::Aggressive, + _ => OptimizationLevel::Default, + } +} diff --git a/codegen/src/api/mod.rs b/codegen/src/api/mod.rs new file mode 100644 index 0000000..77515de --- /dev/null +++ b/codegen/src/api/mod.rs @@ -0,0 +1,3 @@ +mod context; + +pub use context::CodegenContext; diff --git a/codegen/src/infra/error.rs b/codegen/src/infra/error.rs new file mode 100644 index 0000000..e417158 --- /dev/null +++ b/codegen/src/infra/error.rs @@ -0,0 +1,86 @@ +use std::error::Error; +use std::fmt::{self, Display, Formatter}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AirNodePosition { + Stmt(usize), + Terminator, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AirNodeLocation { + pub function: String, + pub block: Option, + pub position: AirNodePosition, +} + +#[derive(Debug)] +pub enum LlvmBackendError { + LlvmError(String), + UnsupportedType(String), + UnsupportedInstruction(String), + InvalidNativeEntry(String), + UnsupportedAir { + kind: &'static str, + detail: String, + location: Option, + }, +} + +impl LlvmBackendError { + pub fn unsupported(kind: &'static str, detail: impl Into) -> Self { + Self::UnsupportedAir { + kind, + detail: detail.into(), + location: None, + } + } + + pub fn unsupported_with_location( + kind: &'static str, + detail: impl Into, + location: AirNodeLocation, + ) -> Self { + Self::UnsupportedAir { + kind, + detail: detail.into(), + location: Some(location), + } + } +} + +impl Display for LlvmBackendError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::LlvmError(message) => write!(f, "llvm error: {message}"), + Self::UnsupportedType(message) => write!(f, "unsupported type: {message}"), + Self::UnsupportedInstruction(message) => { + write!(f, "unsupported instruction: {message}") + } + Self::InvalidNativeEntry(message) => write!(f, "invalid native entry: {message}"), + Self::UnsupportedAir { + kind, + detail, + location, + } => { + write!(f, "unsupported AIR: {kind}")?; + if !detail.is_empty() { + write!(f, " (reason: {detail})")?; + } + if let Some(location) = location { + write!(f, " at fn `{}`", location.function)?; + if let Some(block) = location.block { + write!(f, ", bb{block}")?; + } + match location.position { + AirNodePosition::Stmt(index) => write!(f, ", stmt #{index}")?, + AirNodePosition::Terminator => write!(f, ", terminator")?, + } + } + Ok(()) + } + } + } +} + +impl Error for LlvmBackendError {} diff --git a/codegen/src/infra/mod.rs b/codegen/src/infra/mod.rs new file mode 100644 index 0000000..1ad0496 --- /dev/null +++ b/codegen/src/infra/mod.rs @@ -0,0 +1,3 @@ +pub(crate) mod error; +pub(crate) mod naming; +pub(crate) mod types; diff --git a/codegen/src/infra/naming.rs b/codegen/src/infra/naming.rs new file mode 100644 index 0000000..a159355 --- /dev/null +++ b/codegen/src/infra/naming.rs @@ -0,0 +1,7 @@ +pub(crate) fn is_reserved_bootstrap_builtin(name: &str) -> bool { + matches!(name, "print" | "println") +} + +pub(crate) fn reserved_bootstrap_builtin_message(name: &str) -> String { + format!("reserved builtin during bootstrap: {}", name) +} diff --git a/codegen/src/infra/types.rs b/codegen/src/infra/types.rs new file mode 100644 index 0000000..1ff7673 --- /dev/null +++ b/codegen/src/infra/types.rs @@ -0,0 +1,202 @@ +use crate::CodegenError; +use aelys_air::AirType; +use inkwell::AddressSpace; +use inkwell::types::{ + AnyTypeEnum, BasicMetadataTypeEnum, BasicType, BasicTypeEnum, PointerType, StructType, +}; + +const AELYS_STRING_STRUCT_NAME: &str = "__aelys_string"; + +pub fn air_type_to_llvm<'ctx>( + ty: &AirType, + context: &'ctx inkwell::context::Context, +) -> Result, CodegenError> { + match ty { + AirType::I8 | AirType::U8 => Ok(context.i8_type().into()), + AirType::I16 | AirType::U16 => Ok(context.i16_type().into()), + AirType::I32 | AirType::U32 => Ok(context.i32_type().into()), + AirType::I64 | AirType::U64 => Ok(context.i64_type().into()), + AirType::F32 => Ok(context.f32_type().into()), + AirType::F64 => Ok(context.f64_type().into()), + AirType::Bool => Ok(context.bool_type().into()), + AirType::Str => Ok(aelys_string_type(context).into()), + AirType::Ptr(inner) => Ok(pointer_to_air_type(inner, context)?.into()), + AirType::Enum(name) => { + // Data enums have a named struct type registered; simple enums use i32. + let enum_struct_name = format!("__aelys_enum_{}", name); + if let Some(st) = context.get_struct_type(&enum_struct_name) { + Ok(st.into()) + } else { + Ok(context.i32_type().into()) + } + } + AirType::Struct(name) => context + .get_struct_type(name) + .map(Into::into) + .ok_or_else(|| CodegenError::UnsupportedType(format!("unknown struct type: {}", name))), + AirType::Array(inner, len) => { + let inner_ty = air_basic_type_to_llvm(inner, context)?; + let len_u32 = u32::try_from(*len).map_err(|_| { + CodegenError::UnsupportedType(format!("array length too large for LLVM: {}", len)) + })?; + Ok(inner_ty.array_type(len_u32).into()) + } + AirType::Slice(inner) => { + let ptr_ty = pointer_to_air_type(inner, context); + Ok(context + .struct_type(&[ptr_ty?.into(), context.i64_type().into()], false) + .into()) + } + AirType::FnPtr { + params, + ret, + conv, + } => { + if matches!(conv, aelys_air::CallingConv::Aelys) { + // Aelys function values are fat pointers { fn_ptr, env_ptr }, + // regardless of whether they capture anything. + // + // This makes the representation uniform at call sites. + // See the comment block in codegen/src/lowering/functions.rs if you wanna see everything + Ok(closure_fat_ptr_type(context).into()) + } else { + // C/Rust convention: bare function pointer + let mut llvm_params: Vec> = + Vec::with_capacity(params.len()); + for param in params { + llvm_params.push(air_basic_type_to_llvm(param, context)?.into()); + } + let fn_ty = match ret.as_ref() { + AirType::Void => context.void_type().fn_type(&llvm_params, false), + _ => air_basic_type_to_llvm(ret, context)?.fn_type(&llvm_params, false), + }; + #[allow(deprecated)] + { + Ok(fn_ty.ptr_type(AddressSpace::default()).into()) + } + } + } + AirType::Param(param) => Err(CodegenError::UnsupportedType(format!( + "unresolved AIR type parameter: {:?}", + param + ))), + AirType::Opaque => Err(CodegenError::UnsupportedType( + "unresolved Dynamic type reached codegen (should have been resolved by monomorphization or rejected by validation)".to_string(), + )), + AirType::Void => Ok(context.void_type().into()), + } +} + +pub fn air_basic_type_to_llvm<'ctx>( + ty: &AirType, + context: &'ctx inkwell::context::Context, +) -> Result, CodegenError> { + match air_type_to_llvm(ty, context)? { + AnyTypeEnum::ArrayType(t) => Ok(t.into()), + AnyTypeEnum::FloatType(t) => Ok(t.into()), + AnyTypeEnum::IntType(t) => Ok(t.into()), + AnyTypeEnum::PointerType(t) => Ok(t.into()), + AnyTypeEnum::StructType(t) => Ok(t.into()), + AnyTypeEnum::VectorType(t) => Ok(t.into()), + AnyTypeEnum::ScalableVectorType(t) => Ok(t.into()), + AnyTypeEnum::FunctionType(_) => Err(CodegenError::UnsupportedType( + "function type is not a basic LLVM type".to_string(), + )), + AnyTypeEnum::VoidType(_) => Err(CodegenError::UnsupportedType( + "void type is not a basic LLVM type".to_string(), + )), + } +} + +pub fn alignment_of(ty: BasicTypeEnum<'_>) -> u32 { + match ty { + BasicTypeEnum::IntType(int_ty) => int_alignment(int_ty.get_bit_width()), + BasicTypeEnum::FloatType(float_ty) => float_alignment(float_ty.get_bit_width()), + BasicTypeEnum::PointerType(_) => 8, + BasicTypeEnum::ArrayType(array_ty) => alignment_of(array_ty.get_element_type()), + BasicTypeEnum::StructType(struct_ty) => struct_alignment(struct_ty), + BasicTypeEnum::VectorType(vector_ty) => alignment_of(vector_ty.get_element_type()), + BasicTypeEnum::ScalableVectorType(vector_ty) => alignment_of(vector_ty.get_element_type()), + } +} + +fn int_alignment(bit_width: u32) -> u32 { + match bit_width { + 0..=8 => 1, + 9..=16 => 2, + 17..=32 => 4, + _ => 8, + } +} + +fn float_alignment(bit_width: u32) -> u32 { + match bit_width { + 0..=16 => 2, + 17..=32 => 4, + _ => 8, + } +} + +fn struct_alignment(ty: inkwell::types::StructType<'_>) -> u32 { + if ty.is_opaque() { + return 1; + } + if ty.is_packed() { + return 1; + } + ty.get_field_types() + .into_iter() + .map(alignment_of) + .max() + .unwrap_or(1) +} + +/// Fat pointer type for Aelys closures: `{ ptr fn_ptr, ptr env_ptr }`. +pub fn closure_fat_ptr_type( + context: &'_ inkwell::context::Context, +) -> StructType<'_> { + let ptr_ty = context.ptr_type(AddressSpace::default()); + context.struct_type(&[ptr_ty.into(), ptr_ty.into()], false) +} + +fn pointer_to_i8<'ctx>(context: &'ctx inkwell::context::Context) -> PointerType<'ctx> { + #[allow(deprecated)] + { + context.i8_type().ptr_type(AddressSpace::default()) + } +} + +pub(crate) fn aelys_string_type<'ctx>( + context: &'ctx inkwell::context::Context, +) -> StructType<'ctx> { + if let Some(existing) = context.get_struct_type(AELYS_STRING_STRUCT_NAME) { + if existing.is_opaque() { + existing.set_body( + &[pointer_to_i8(context).into(), context.i64_type().into()], + false, + ); + } + return existing; + } + + let ty = context.opaque_struct_type(AELYS_STRING_STRUCT_NAME); + ty.set_body( + &[pointer_to_i8(context).into(), context.i64_type().into()], + false, + ); + ty +} + +fn pointer_to_air_type<'ctx>( + ty: &AirType, + context: &'ctx inkwell::context::Context, +) -> Result, CodegenError> { + if matches!(ty, AirType::Void) { + return Ok(context.ptr_type(AddressSpace::default())); + } + + #[allow(deprecated)] + { + Ok(air_basic_type_to_llvm(ty, context)?.ptr_type(AddressSpace::default())) + } +} diff --git a/codegen/src/lib.rs b/codegen/src/lib.rs new file mode 100644 index 0000000..60db48a --- /dev/null +++ b/codegen/src/lib.rs @@ -0,0 +1,20 @@ +mod api; +mod infra; +mod lowering; + +pub mod types; + +pub use api::CodegenContext; +pub use infra::error::{AirNodeLocation, AirNodePosition, LlvmBackendError}; + +pub type CodegenError = LlvmBackendError; + +pub(crate) use infra::naming::{is_reserved_bootstrap_builtin, reserved_bootstrap_builtin_message}; + +pub(crate) fn module_targets_windows(module: &inkwell::module::Module) -> bool { + module + .get_triple() + .as_str() + .to_str() + .map_or(false, |t| t.contains("windows")) +} diff --git a/codegen/src/lowering/body.rs b/codegen/src/lowering/body.rs new file mode 100644 index 0000000..0fbf9c4 --- /dev/null +++ b/codegen/src/lowering/body.rs @@ -0,0 +1,393 @@ +use crate::lowering::functions::{function_has_implicit_env, needs_sret}; +use crate::types::air_basic_type_to_llvm; +use crate::{AirNodeLocation, AirNodePosition, CodegenError}; +use aelys_air::{ + AirFunction, AirProgram, AirStmtKind, AirType, BlockId, FunctionId, LocalId, Operand, Place, + Rvalue, +}; +use inkwell::basic_block::BasicBlock; +use inkwell::builder::Builder; +use inkwell::context::Context; +use inkwell::module::Module; +use inkwell::values::{BasicValueEnum, FunctionValue, PointerValue}; +use std::collections::{HashMap, HashSet}; + +pub(crate) struct FunctionCodegen<'a> { + pub(crate) context: &'static Context, + pub(crate) module: &'a Module<'static>, + pub(crate) builder: Builder<'static>, + pub(crate) function: FunctionValue<'static>, + pub(crate) air_function: &'a AirFunction, + pub(crate) program: &'a AirProgram, + pub(crate) function_names: &'a HashMap, + pub(crate) block_map: HashMap>, + pub(crate) alloca_locals: HashSet, + pub(crate) alloca_map: HashMap>, + pub(crate) value_map: HashMap>, + pub(crate) local_types: HashMap, + pub(crate) string_id: u64, + pub(crate) string_globals: HashMap>, + pub(crate) current_block: Option, + pub(crate) current_stmt_index: Option, + // cached so we don't recompute it 3 times + entry_block_id: BlockId, + // sret pointer (LLVM param 0) for C-convention functions returning structs + // on Windows. Terminators store to this instead of returning directly. + pub(crate) sret_ptr: Option>, +} + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn target_is_windows(&self) -> bool { + crate::module_targets_windows(self.module) + } + + pub(crate) fn new( + context: &'static Context, + module: &'a Module<'static>, + function: FunctionValue<'static>, + air_function: &'a AirFunction, + program: &'a AirProgram, + function_names: &'a HashMap, + ) -> Self { + let mut local_types = HashMap::new(); + let mut alloca_locals = HashSet::new(); + let param_ids: HashSet = air_function.params.iter().map(|p| p.id).collect(); + for param in &air_function.params { + local_types.insert(param.id, param.ty.clone()); + } + for local in &air_function.locals { + local_types.insert(local.id, local.ty.clone()); + if !param_ids.contains(&local.id) && (local.name.is_some() || local.is_mut) { + alloca_locals.insert(local.id); + } + } + // locals assigned in 2+ blocks need alloca, value_map can't express + // phi nodes, so multi-block assignments (if-expressions, short-circuit + // AND/OR) would silently read stale values from codegen order instead + // of control flow. alloca + mem2reg fixes this though + let mut first_assign_block: HashMap = HashMap::new(); + for block in &air_function.blocks { + for stmt in &block.stmts { + if let AirStmtKind::Assign { place, rvalue } = &stmt.kind { + if let Place::Local(local) = place { + // A param already has an SSA value from copy_params (the + // function entry), so any subsequent assignment in a block + // creates a second "definition site" — force alloca so + // dominance requirements are met across basic blocks. + if param_ids.contains(local) { + alloca_locals.insert(*local); + } else { + match first_assign_block.entry(*local) { + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(block.id); + } + std::collections::hash_map::Entry::Occupied(e) => { + if *e.get() != block.id { + alloca_locals.insert(*local); + } + } + } + } + } + if let Place::Field(local, _) = place { + alloca_locals.insert(*local); + } + if let Place::Index(local, _) = place { + alloca_locals.insert(*local); + } + if let Rvalue::AddressOf(local) = rvalue { + alloca_locals.insert(*local); + } + if let Rvalue::Index { base, .. } = rvalue { + if let Operand::Copy(local) | Operand::Move(local) = base { + if matches!(local_types.get(local), Some(AirType::Array(_, _))) { + alloca_locals.insert(*local); + } + } + } + } + } + } + + let entry_block_id = find_entry_block(air_function); + + let is_windows = crate::module_targets_windows(module); + let sret_ptr = if needs_sret( + &air_function.ret_ty, + air_function.calling_conv, + is_windows, + program, + ) { + Some( + function + .get_nth_param(0) + .expect("sret function must have param 0") + .into_pointer_value(), + ) + } else { + None + }; + + Self { + context, + module, + builder: context.create_builder(), + function, + air_function, + program, + function_names, + block_map: HashMap::new(), + alloca_locals, + alloca_map: HashMap::new(), + value_map: HashMap::new(), + local_types, + string_id: 0, + string_globals: HashMap::new(), + current_block: None, + current_stmt_index: None, + entry_block_id, + sret_ptr, + } + } + + pub(crate) fn generate(&mut self) -> Result<(), CodegenError> { + self.create_blocks(); + self.create_allocas()?; + self.copy_params()?; + self.generate_blocks() + } + + /// Returns block IDs in codegen order: entry first, then the rest. + fn ordered_block_ids(&self) -> Vec { + let mut ids = Vec::with_capacity(self.air_function.blocks.len()); + ids.push(self.entry_block_id); + for block in &self.air_function.blocks { + if block.id != self.entry_block_id { + ids.push(block.id); + } + } + ids + } + + fn create_blocks(&mut self) { + for id in self.ordered_block_ids() { + let name = format!("bb{}", id.0); + let llvm_block = self.context.append_basic_block(self.function, &name); + self.block_map.insert(id, llvm_block); + } + } + + fn create_allocas(&mut self) -> Result<(), CodegenError> { + let entry = self.entry_block()?; + self.builder.position_at_end(entry); + + let params = self.air_function.params.clone(); + for param in params { + if self.local_uses_alloca(param.id) { + self.ensure_local_alloca(param.id, ¶m.ty)?; + } + } + + let locals = self.air_function.locals.clone(); + for local in locals { + if self.local_uses_alloca(local.id) { + self.ensure_local_alloca(local.id, &local.ty)?; + } + } + + Ok(()) + } + + fn copy_params(&mut self) -> Result<(), CodegenError> { + let params = self.air_function.params.clone(); + // sret pointer occupies LLVM param 0, implicit env occupies the next slot + let mut offset = if self.sret_ptr.is_some() { 1u32 } else { 0 }; + if function_has_implicit_env(self.air_function) { + offset += 1; // skip implicit env param + } + for (index, param) in params.iter().enumerate() { + let value = self + .function + .get_nth_param(index as u32 + offset) + .ok_or_else(|| { + CodegenError::LlvmError(format!( + "missing LLVM param {} in `{}`", + index, self.air_function.name + )) + })?; + if self.local_uses_alloca(param.id) { + let ptr = self.lookup_local_ptr(param.id)?; + self.store_value(ptr, value.into())?; + } else { + self.value_map.insert(param.id, value); + } + } + + Ok(()) + } + + fn generate_blocks(&mut self) -> Result<(), CodegenError> { + let blocks = self.air_function.blocks.clone(); + let order = self.ordered_block_ids(); + + for block_id in order { + let block = blocks.iter().find(|b| b.id == block_id).unwrap(); + self.current_block = Some(block.id); + let llvm_block = self.lookup_block(block.id)?; + self.builder.position_at_end(llvm_block); + for (stmt_index, stmt) in block.stmts.iter().enumerate() { + self.current_stmt_index = Some(stmt_index); + self.generate_stmt(&stmt.kind)?; + } + self.current_stmt_index = None; + self.generate_terminator(&block.terminator)?; + } + + self.current_block = None; + Ok(()) + } + + pub(crate) fn ensure_local_alloca( + &mut self, + local: LocalId, + ty: &AirType, + ) -> Result, CodegenError> { + if let Some(ptr) = self.alloca_map.get(&local).copied() { + return Ok(ptr); + } + if !self.local_uses_alloca(local) { + return Err(CodegenError::LlvmError(format!( + "local {} has no stack storage", + local.0 + ))); + } + + let alloca_ty = air_basic_type_to_llvm(ty, self.context)?; + let ptr = self + .builder + .build_alloca(alloca_ty, &format!("l{}", local.0)) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(ptr, alloca_ty)?; + + self.alloca_map.insert(local, ptr); + Ok(ptr) + } + + fn entry_block(&self) -> Result, CodegenError> { + self.lookup_block(self.entry_block_id) + } + + pub(crate) fn lookup_block(&self, id: BlockId) -> Result, CodegenError> { + self.block_map + .get(&id) + .copied() + .ok_or_else(|| CodegenError::LlvmError(format!("unknown block {}", id.0))) + } + + pub(crate) fn lookup_local_ptr( + &self, + local: LocalId, + ) -> Result, CodegenError> { + self.alloca_map + .get(&local) + .copied() + .ok_or_else(|| CodegenError::LlvmError(format!("unknown local {}", local.0))) + } + + pub(crate) fn local_uses_alloca(&self, local: LocalId) -> bool { + self.alloca_locals.contains(&local) + } + + pub(crate) fn assign_local( + &mut self, + local: LocalId, + value: BasicValueEnum<'static>, + ) -> Result<(), CodegenError> { + if let Some(ptr) = self.alloca_map.get(&local).copied() { + return self.store_value(ptr, value); + } + if self.local_uses_alloca(local) { + return Err(CodegenError::LlvmError(format!( + "missing stack slot for local {}", + local.0 + ))); + } + self.value_map.insert(local, value); + Ok(()) + } + + pub(crate) fn local_air_type(&self, local: LocalId) -> Result<&AirType, CodegenError> { + self.local_types + .get(&local) + .ok_or_else(|| CodegenError::UnsupportedType(format!("unknown local {}", local.0))) + } + + pub(crate) fn load_local( + &mut self, + local: LocalId, + ) -> Result, CodegenError> { + if let Some(ptr) = self.alloca_map.get(&local).copied() { + let ty = air_basic_type_to_llvm(self.local_air_type(local)?, self.context)?; + return self.load_value(ty, ptr, &format!("ld{}", local.0)); + } + self.value_map.get(&local).copied().ok_or_else(|| { + CodegenError::LlvmError(format!("local {} used before assignment", local.0)) + }) + } + + pub(crate) fn unsupported_air( + &self, + kind: &'static str, + detail: impl Into, + ) -> CodegenError { + let position = match self.current_stmt_index { + Some(index) => AirNodePosition::Stmt(index), + None => AirNodePosition::Terminator, + }; + let location = AirNodeLocation { + function: self.air_function.name.clone(), + block: self.current_block.map(|block| block.0), + position, + }; + CodegenError::unsupported_with_location(kind, detail, location) + } +} + +/// Find the entry block (no predecessors). Falls back to first block. +fn find_entry_block(func: &AirFunction) -> BlockId { + use aelys_air::AirTerminator; + + let mut has_predecessors = HashSet::new(); + for block in &func.blocks { + match &block.terminator { + AirTerminator::Goto(target) => { + has_predecessors.insert(*target); + } + AirTerminator::Branch { + then_block, + else_block, + .. + } => { + has_predecessors.insert(*then_block); + has_predecessors.insert(*else_block); + } + AirTerminator::Switch { + targets, default, .. + } => { + for (_, target) in targets { + has_predecessors.insert(*target); + } + has_predecessors.insert(*default); + } + _ => {} + } + } + + for block in &func.blocks { + if !has_predecessors.contains(&block.id) { + return block.id; + } + } + + func.blocks.first().map(|b| b.id).unwrap_or(BlockId(0)) +} diff --git a/codegen/src/lowering/calls.rs b/codegen/src/lowering/calls.rs new file mode 100644 index 0000000..933f188 --- /dev/null +++ b/codegen/src/lowering/calls.rs @@ -0,0 +1,580 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::lowering::functions::{function_has_implicit_env, llvm_calling_convention, needs_sret}; +use crate::lowering::globals::{GLOBAL_GET_PREFIX, GLOBAL_SET_PREFIX}; +use crate::types::{aelys_string_type, air_basic_type_to_llvm}; +use crate::{is_reserved_bootstrap_builtin, reserved_bootstrap_builtin_message}; +use aelys_air::{AirConst, AirType, Callee, LocalId, Operand, layout::enum_has_data}; +use inkwell::types::{BasicMetadataTypeEnum, BasicType, FunctionType}; +use inkwell::values::{BasicMetadataValueEnum, BasicValueEnum, FunctionValue}; + +// Call generation for Aelys closures and function values +// +// Indirect calls (Callee::FnPtr) split into two paths based on calling +// convention: +// +// - Aelys convention: the local holds a fat pointer { fn_ptr, env_ptr } +// +// We extract both fields, prepend env_ptr to the argument list, and call +// fn_ptr indirectly. This works identically for capturing closures (env points to a heap struct), non-capturing lambdas (env is null), +// and also named functions used as values (env is null). +// +// The callee always expects env as its first parameter. +// +// - C convention: the local holds a bare function pointer, standard indirect call +// +// Direct calls (Callee::Direct, Callee::Named) to Aelys functions prepend a +// null env pointer. +// +// callee_has_implicit_env checks whether the target function +// was declared with the implicit env +// +// (i.e. it's a non-extern, non-closure Aelys function). Extern, builtin, and ad-hoc functions don't get env + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_call( + &mut self, + callee: &Callee, + args: &[Operand], + expected_ret: Option<&AirType>, + ) -> Result>, CodegenError> { + let mut arg_values = Vec::with_capacity(args.len()); + for arg in args { + arg_values.push(self.generate_operand(arg)?); + } + + // AIR still models globals as synthetic get/set calls until it grows + // first-class global operands, so lower them directly here. + if let Callee::Named(name) = callee { + if let Some(global_name) = name.strip_prefix(GLOBAL_GET_PREFIX) { + return self.generate_global_get(global_name, args); + } + if let Some(global_name) = name.strip_prefix(GLOBAL_SET_PREFIX) { + return self.generate_global_set(global_name, args); + } + + // print/println are reserved bootstrap names lowered to __aelys_write + if is_reserved_bootstrap_builtin(name) { + debug_assert!( + self.module.get_function(name).is_none(), + "reserved builtin must be rejected during declaration phase" + ); + if self.module.get_function(name).is_some() { + return Err(CodegenError::UnsupportedInstruction( + reserved_bootstrap_builtin_message(name), + )); + } + return self.generate_bootstrap_print_call( + name == "println", + args, + &arg_values, + expected_ret, + ); + } + } + + let metadata_args: Vec> = + arg_values.iter().copied().map(Into::into).collect(); + let arg_types: Vec> = + arg_values.iter().map(|v| v.get_type().into()).collect(); + + match callee { + Callee::FnPtr(local) => { + let (fn_ty, call_conv, sret_ret) = self.fn_ptr_signature_for_local(*local)?; + let is_aelys_fnptr = self.is_aelys_convention_fnptr(*local); + + if is_aelys_fnptr { + // Aelys-convention fat pointer: { fn_ptr, env_ptr } + let fat_ptr = self.load_local(*local)?.into_struct_value(); + let fn_ptr = self + .builder + .build_extract_value(fat_ptr, 0, "closure_fn") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_pointer_value(); + let env_ptr = self + .builder + .build_extract_value(fat_ptr, 1, "closure_env") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + // Prepend env_ptr to args + let mut all_args: Vec> = + vec![env_ptr.into()]; + all_args.extend(metadata_args.iter().copied()); + let call = self + .builder + .build_indirect_call(fn_ty, fn_ptr, &all_args, "call_closure") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + call.set_call_convention(call_conv); + Ok(call.try_as_basic_value().basic()) + } else { + // C-convention bare function pointer + let fn_ptr = self.load_local(*local)?.into_pointer_value(); + if let Some(ret_air_ty) = sret_ret { + let ret_ty = air_basic_type_to_llvm(&ret_air_ty, self.context)?; + let result_ptr = self + .builder + .build_alloca(ret_ty, "sret_slot") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(result_ptr, ret_ty)?; + let mut all_args: Vec> = + vec![result_ptr.into()]; + all_args.extend(metadata_args.iter().copied()); + let call = self + .builder + .build_indirect_call(fn_ty, fn_ptr, &all_args, "call_indirect") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + call.set_call_convention(call_conv); + self.add_sret_callsite_attr(call, ret_ty); + Ok(Some( + self.builder + .build_load(ret_ty, result_ptr, "call_indirect_sret") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?, + )) + } else { + let call = self + .builder + .build_indirect_call(fn_ty, fn_ptr, &metadata_args, "call_indirect") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + call.set_call_convention(call_conv); + Ok(call.try_as_basic_value().basic()) + } + } + } + _ => { + let fn_value = self.resolve_callee(callee, &arg_types, expected_ret)?; + // Non-extern Aelys functions get an implicit env param; prepend null. + let needs_env = self.callee_has_implicit_env(callee); + let final_args = if needs_env { + let null_env = self + .context + .ptr_type(inkwell::AddressSpace::default()) + .const_null(); + let mut all = Vec::with_capacity(metadata_args.len() + 1); + all.push(null_env.into()); + all.extend(metadata_args.iter().copied()); + all + } else { + metadata_args + }; + + if let Some(ret_air_ty) = expected_ret + && self.callee_needs_sret(callee) + { + let ret_ty = air_basic_type_to_llvm(ret_air_ty, self.context)?; + Ok(Some(self.call_with_sret( + fn_value, + &final_args, + ret_ty, + "call_sret", + )?)) + } else { + let call = self + .builder + .build_call(fn_value, &final_args, "call_direct") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + call.set_call_convention(fn_value.get_call_conventions()); + Ok(call.try_as_basic_value().basic()) + } + } + } + } + + fn resolve_callee( + &mut self, + callee: &Callee, + arg_types: &[BasicMetadataTypeEnum<'static>], + expected_ret: Option<&AirType>, + ) -> Result, CodegenError> { + match callee { + Callee::Direct(id) => { + let name = self.function_names.get(id).ok_or_else(|| { + CodegenError::UnsupportedInstruction(format!("unknown function id {}", id.0)) + })?; + self.module + .get_function(name) + .ok_or_else(|| CodegenError::LlvmError(format!("missing function {}", name))) + } + Callee::Named(name) => { + if let Some(function) = self.module.get_function(name) { + return Ok(function); + } + let fn_ty = self.ad_hoc_function_type(arg_types, expected_ret)?; + Ok(self.module.add_function(name, fn_ty, None)) + } + Callee::Extern(name, conv) => { + let function = if let Some(function) = self.module.get_function(name) { + function + } else { + let fn_ty = self.ad_hoc_function_type(arg_types, expected_ret)?; + self.module.add_function(name, fn_ty, None) + }; + function.set_call_conventions(llvm_calling_convention(*conv)); + Ok(function) + } + Callee::FnPtr(_) => Err(CodegenError::UnsupportedInstruction( + "callee::FnPtr should be handled by generate_call".to_string(), + )), + } + } + + fn ad_hoc_function_type( + &self, + arg_types: &[BasicMetadataTypeEnum<'static>], + expected_ret: Option<&AirType>, + ) -> Result, CodegenError> { + match expected_ret { + None | Some(AirType::Void) => Ok(self.context.void_type().fn_type(arg_types, false)), + Some(ret) => Ok(air_basic_type_to_llvm(ret, self.context)?.fn_type(arg_types, false)), + } + } + + fn generate_bootstrap_print_call( + &mut self, + newline: bool, + args: &[Operand], + arg_values: &[BasicValueEnum<'static>], + expected_ret: Option<&AirType>, + ) -> Result>, CodegenError> { + if args.len() != 1 || arg_values.len() != 1 { + return Err(CodegenError::UnsupportedInstruction( + "print/println expects exactly one argument".to_string(), + )); + } + + let arg_type = self.operand_type(&args[0])?; + let value = arg_values[0]; + + let string_value = match arg_type { + AirType::I64 | AirType::I32 | AirType::I16 | AirType::I8 => { + let int_val = value.into_int_value(); + let i64_val = if int_val.get_type() == self.context.i64_type() { + int_val + } else { + self.builder + .build_int_s_extend(int_val, self.context.i64_type(), "ext_i64") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + }; + let fn_val = self.ensure_to_string_i64_function(); + self.call_sret_returning_fn(fn_val, &[i64_val.into()], "to_str")? + } + AirType::U8 | AirType::U16 | AirType::U32 | AirType::U64 => { + // unsigned: zero-extend to i64 before calling to_string_i64 + let int_val = value.into_int_value(); + let i64_val = if int_val.get_type() == self.context.i64_type() { + int_val + } else { + self.builder + .build_int_z_extend(int_val, self.context.i64_type(), "zext_i64") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + }; + let fn_val = self.ensure_to_string_i64_function(); + self.call_sret_returning_fn(fn_val, &[i64_val.into()], "to_str")? + } + AirType::F64 | AirType::F32 => { + let float_val = value.into_float_value(); + let f64_val = if float_val.get_type() == self.context.f64_type() { + float_val + } else { + self.builder + .build_float_ext(float_val, self.context.f64_type(), "ext_f64") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + }; + let fn_val = self.ensure_to_string_f64_function(); + self.call_sret_returning_fn(fn_val, &[f64_val.into()], "to_str")? + } + AirType::Bool => { + let bool_val = value.into_int_value(); + let i64_val = self + .builder + .build_int_z_extend(bool_val, self.context.i64_type(), "bool_to_i64") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let fn_val = self.ensure_to_string_bool_function(); + self.call_sret_returning_fn(fn_val, &[i64_val.into()], "to_str")? + } + AirType::Str => match &args[0] { + Operand::Const(AirConst::Str(text)) => self.global_string_value(text)?, + _ if value.is_struct_value() => { + let struct_val = value.into_struct_value(); + if struct_val.get_type() != aelys_string_type(self.context) { + return Err(CodegenError::UnsupportedType( + "expected string struct value".to_string(), + )); + } + value + } + _ => { + return Err(CodegenError::UnsupportedType( + "expected string value".to_string(), + )); + } + }, + AirType::Enum(ref enum_name) => { + return self.generate_enum_print(enum_name, &args[0], value, newline, expected_ret); + } + _ => { + return Err(CodegenError::UnsupportedType(format!( + "print/println does not support type {:?}", + arg_type + ))); + } + }; + + let (ptr, len) = if string_value.is_struct_value() { + self.string_parts_from_value(string_value.into_struct_value())? + } else { + return Err(CodegenError::LlvmError( + "to_string did not return struct".to_string(), + )); + }; + + let write_fn = self.ensure_write_function(); + self.builder + .build_call(write_fn, &[ptr.into(), len.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + if newline { + let (nl_ptr, nl_len) = self.global_string_ptr_len("\n")?; + let nl_len = self.context.i64_type().const_int(nl_len, false); + self.builder + .build_call(write_fn, &[nl_ptr.into(), nl_len.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + } + + // print/println is semantically void but sema infers Dynamic → I64 for its return, + // so the AIR may emit Rvalue::Call (not CallVoid). We return const_zero() here because + // erroring would break normal println("hi") calls. The real fix is in sema: type + // bootstrap builtins as void so the AIR always emits CallVoid + match expected_ret { + None | Some(AirType::Void) => Ok(None), + Some(ret) => Ok(Some( + air_basic_type_to_llvm(ret, self.context)?.const_zero(), + )), + } + } + + /// Check if a callee uses sret convention on the current target. + /// Only C-convention functions with struct-like returns need this. + fn callee_needs_sret(&self, callee: &Callee) -> bool { + let is_windows = self.target_is_windows(); + match callee { + Callee::Direct(id) => self + .program + .functions + .iter() + .find(|f| f.id == *id) + .map_or(false, |f| { + needs_sret(&f.ret_ty, f.calling_conv, is_windows, self.program) + }), + Callee::Extern(name, _) => self + .program + .functions + .iter() + .find(|f| f.name == *name && f.is_extern) + .map_or(false, |f| { + needs_sret(&f.ret_ty, f.calling_conv, is_windows, self.program) + }), + _ => false, + } + } + + /// Returns true if the local holds an Aelys-convention fat pointer. + fn is_aelys_convention_fnptr(&self, local: LocalId) -> bool { + matches!( + self.local_air_type(local), + Ok(AirType::FnPtr { conv: aelys_air::CallingConv::Aelys, .. }) + ) + } + + /// Returns true if a direct/named callee has an implicit env param. + fn callee_has_implicit_env(&self, callee: &Callee) -> bool { + match callee { + Callee::Direct(id) => self + .program + .functions + .iter() + .find(|f| f.id == *id) + .map_or(false, |f| function_has_implicit_env(f)), + Callee::Named(name) => { + // Ad-hoc / builtin / extern functions don't get env + self.program + .functions + .iter() + .find(|f| f.name == *name) + .map_or(false, |f| function_has_implicit_env(f)) + } + _ => false, + } + } + + fn fn_ptr_signature_for_local( + &self, + local: LocalId, + ) -> Result<(FunctionType<'static>, u32, Option), CodegenError> { + match self.local_air_type(local)? { + AirType::FnPtr { params, ret, conv } => { + let is_aelys = matches!(conv, aelys_air::CallingConv::Aelys); + let use_sret = + needs_sret(ret.as_ref(), *conv, self.target_is_windows(), self.program); + let extra = usize::from(use_sret) + usize::from(is_aelys); + let mut param_types = Vec::with_capacity(params.len() + extra); + if use_sret { + param_types.push(self.context.ptr_type(inkwell::AddressSpace::default()).into()); + } + if is_aelys { + // Implicit env_ptr parameter + param_types.push(self.context.ptr_type(inkwell::AddressSpace::default()).into()); + } + for param in params { + param_types.push(air_basic_type_to_llvm(param, self.context)?.into()); + } + + let fn_ty = match ret.as_ref() { + AirType::Void => self.context.void_type().fn_type(¶m_types, false), + other => { + if use_sret { + self.context.void_type().fn_type(¶m_types, false) + } else { + air_basic_type_to_llvm(other, self.context)?.fn_type(¶m_types, false) + } + } + }; + Ok(( + fn_ty, + llvm_calling_convention(*conv), + use_sret.then(|| ret.as_ref().clone()), + )) + } + other => Err(CodegenError::UnsupportedType(format!( + "local {} is not fn ptr: {:?}", + local.0, other + ))), + } + } + + fn generate_enum_print( + &mut self, + enum_name: &str, + _arg: &Operand, + value: BasicValueEnum<'static>, + newline: bool, + expected_ret: Option<&AirType>, + ) -> Result>, CodegenError> { + let enum_def = self + .program + .enums + .iter() + .find(|e| e.name == enum_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown enum for print: {}", enum_name)) + })? + .clone(); + + // Use the original enum name for display (strip __mono_ prefix) + let display_name = if let Some(rest) = enum_name.strip_prefix("__mono_") { + rest.split('_').next().unwrap_or(rest) + } else { + enum_name + }; + + let is_data = enum_has_data(&enum_def); + + // Save the entry block (where the tag computation happens) + let entry_bb = self.builder.get_insert_block().ok_or_else(|| { + CodegenError::LlvmError("no current block".to_string()) + })?; + + // Extract the i32 tag + let tag_val = if is_data { + let enum_struct_name = format!("__aelys_enum_{}", enum_name); + let enum_ty = + self.context + .get_struct_type(&enum_struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "unknown enum struct type: {}", + enum_struct_name + )) + })?; + let tmp = self + .builder + .build_alloca(enum_ty, "print_enum_tmp") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(tmp, enum_ty.into())?; + self.store_value(tmp, value)?; + let tag_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 0, "print_tag_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.load_value(self.context.i32_type().into(), tag_ptr, "print_tag")? + .into_int_value() + } else { + value.into_int_value() + }; + + let current_fn = self.function; + let write_fn = self.ensure_write_function(); + + // Create blocks: one per variant + default + merge + let merge_bb = self + .context + .append_basic_block(current_fn, "print_enum_merge"); + let default_bb = self + .context + .append_basic_block(current_fn, "print_enum_default"); + + let mut variant_blocks = Vec::new(); + for variant in &enum_def.variants { + let bb = self + .context + .append_basic_block(current_fn, &format!("print_{}", variant.name)); + variant_blocks.push((variant.tag, variant.name.clone(), bb)); + } + + // Build default block (fallthrough to merge) + self.builder.position_at_end(default_bb); + self.builder + .build_unconditional_branch(merge_bb) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + // Build each variant block: write "EnumName::VariantName", branch to merge + for &(_, ref name, bb) in &variant_blocks { + self.builder.position_at_end(bb); + let text = format!("{}::{}", display_name, name); + let (ptr, str_len) = self.global_string_ptr_len(&text)?; + let len_val = self.context.i64_type().const_int(str_len, false); + self.builder + .build_call(write_fn, &[ptr.into(), len_val.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.builder + .build_unconditional_branch(merge_bb) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + } + + // Go back to entry block and build the switch terminator + self.builder.position_at_end(entry_bb); + let cases: Vec<_> = variant_blocks + .iter() + .map(|&(tag, _, bb)| (self.context.i32_type().const_int(tag as u64, false), bb)) + .collect(); + self.builder + .build_switch(tag_val, default_bb, &cases) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + // Continue in merge block + self.builder.position_at_end(merge_bb); + + if newline { + let (nl_ptr, nl_len) = self.global_string_ptr_len("\n")?; + let nl_len = self.context.i64_type().const_int(nl_len, false); + self.builder + .build_call(write_fn, &[nl_ptr.into(), nl_len.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + } + + match expected_ret { + None | Some(AirType::Void) => Ok(None), + Some(ret) => Ok(Some( + air_basic_type_to_llvm(ret, self.context)?.const_zero(), + )), + } + } +} diff --git a/codegen/src/lowering/casts.rs b/codegen/src/lowering/casts.rs new file mode 100644 index 0000000..36ca92b --- /dev/null +++ b/codegen/src/lowering/casts.rs @@ -0,0 +1,160 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use aelys_air::{AirType, Operand}; +use inkwell::context::Context; +use inkwell::types::{FloatType, IntType}; +use inkwell::values::BasicValueEnum; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_cast( + &mut self, + operand: &Operand, + from: &AirType, + to: &AirType, + ) -> Result, CodegenError> { + let value = self.generate_operand(operand)?; + + if from == to { + return Ok(value); + } + + if let (Some((from_bits, from_signed)), Some((to_bits, _))) = (int_info(from), int_info(to)) + { + let target_ty = int_type_for_air(self.context, to)?; + if from_bits > to_bits { + return self + .builder + .build_int_truncate(value.into_int_value(), target_ty, "trunc") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + + if from_bits < to_bits { + if from_signed { + return self + .builder + .build_int_s_extend(value.into_int_value(), target_ty, "sext") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + return self + .builder + .build_int_z_extend(value.into_int_value(), target_ty, "zext") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + + return Ok(value); + } + + if let (Some(from_bits), Some(to_bits)) = (float_info(from), float_info(to)) { + let target_ty = float_type_for_air(self.context, to)?; + if from_bits > to_bits { + return self + .builder + .build_float_trunc(value.into_float_value(), target_ty, "fptrunc") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + + if from_bits < to_bits { + return self + .builder + .build_float_ext(value.into_float_value(), target_ty, "fpext") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + + return Ok(value); + } + + if float_info(from).is_some() && int_info(to).is_some() { + let target_ty = int_type_for_air(self.context, to)?; + let (_, to_signed) = int_info(to).unwrap(); + return if to_signed { + self.builder + .build_float_to_signed_int(value.into_float_value(), target_ty, "fptosi") + .map(Into::into) + } else { + self.builder + .build_float_to_unsigned_int(value.into_float_value(), target_ty, "fptoui") + .map(Into::into) + } + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + + if int_info(from).is_some() && float_info(to).is_some() { + let target_ty = float_type_for_air(self.context, to)?; + let (_, from_signed) = int_info(from).unwrap(); + return if from_signed { + self.builder + .build_signed_int_to_float(value.into_int_value(), target_ty, "sitofp") + .map(Into::into) + } else { + self.builder + .build_unsigned_int_to_float(value.into_int_value(), target_ty, "uitofp") + .map(Into::into) + } + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + + Err(CodegenError::UnsupportedInstruction(format!( + "unsupported cast {:?} -> {:?}", + from, to + ))) + } +} + +fn int_type_for_air( + context: &'static Context, + ty: &AirType, +) -> Result, CodegenError> { + match ty { + AirType::I8 | AirType::U8 => Ok(context.i8_type()), + AirType::I16 | AirType::U16 => Ok(context.i16_type()), + AirType::I32 | AirType::U32 => Ok(context.i32_type()), + AirType::I64 | AirType::U64 => Ok(context.i64_type()), + AirType::Bool => Ok(context.bool_type()), + _ => Err(CodegenError::UnsupportedType(format!( + "expected int type, got {:?}", + ty + ))), + } +} + +fn float_type_for_air( + context: &'static Context, + ty: &AirType, +) -> Result, CodegenError> { + match ty { + AirType::F32 => Ok(context.f32_type()), + AirType::F64 => Ok(context.f64_type()), + _ => Err(CodegenError::UnsupportedType(format!( + "expected float type, got {:?}", + ty + ))), + } +} + +fn int_info(ty: &AirType) -> Option<(u32, bool)> { + match ty { + AirType::I8 => Some((8, true)), + AirType::I16 => Some((16, true)), + AirType::I32 => Some((32, true)), + AirType::I64 => Some((64, true)), + AirType::U8 => Some((8, false)), + AirType::U16 => Some((16, false)), + AirType::U32 => Some((32, false)), + AirType::U64 => Some((64, false)), + AirType::Bool => Some((1, false)), + _ => None, + } +} + +fn float_info(ty: &AirType) -> Option { + match ty { + AirType::F32 => Some(32), + AirType::F64 => Some(64), + _ => None, + } +} diff --git a/codegen/src/lowering/functions.rs b/codegen/src/lowering/functions.rs new file mode 100644 index 0000000..8f8867d --- /dev/null +++ b/codegen/src/lowering/functions.rs @@ -0,0 +1,399 @@ +use crate::CodegenContext; +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::types::{air_basic_type_to_llvm, air_type_to_llvm}; +use crate::{is_reserved_bootstrap_builtin, reserved_bootstrap_builtin_message}; +use aelys_air::{ + layout::enum_has_data, + AirFunction, AirProgram, AirType, CallingConv as AirCallingConv, FunctionAttribs, InlineHint, +}; +use inkwell::AddressSpace; +use inkwell::attributes::{Attribute, AttributeLoc}; +use inkwell::llvm_sys::LLVMCallConv; +use inkwell::types::{BasicMetadataTypeEnum, BasicType, FunctionType}; +use inkwell::values::FunctionValue; +use std::collections::HashMap; + +// Calling convention for Aelys function values +// +// All Aelys-convention function values (closures, lambdas, named functions used as values) share a uniform fat pointer representation: +// +// { fn_ptr, env_ptr }. +// +// Non-capturing forms have env_ptr = null. C-convention FnPtrs stay as bare +// pointers; the C ABI has no notion of an env, and extern functions don't +// need one +// +// To make indirect calls uniform, every non-extern Aelys function receives an +// implicit `env: ptr` as its first LLVM parameter. +// +// Named functions ignore it (callers pass null); closure bodies use it to access captured values. +// The cost is zero under fastcc: it's just an unused register not a stack push +// +// Exception: closure functions already have `__env` as an explicit AIR-level parameter (added during lower_closure). +// These do not get the implicit env on top; function_has_implicit_env excludes them. +// +// At LLVM level, both forms end up with env at param 0, which is what indirect callers expect. +// +// The entry wrapper (__aelys_user_main) bridges from C convention to the Aelys main function by passing null as the env argument. + +const USER_MAIN_SYMBOL: &str = "__aelys_main"; +const NATIVE_ENTRY_SYMBOL: &str = "__aelys_user_main"; + +impl CodegenContext { + pub(crate) fn declare_functions(&self, program: &AirProgram) -> Result<(), CodegenError> { + self.ensure_no_reserved_bootstrap_builtins(program)?; + + for function in &program.functions { + // Any C-convention entry point crosses the platform ABI boundary, even when + // the function body lives in this module and is called via a fnptr later. + if matches!(function.calling_conv, AirCallingConv::C) { + reject_struct_abi_on_c_function(function, program)?; + } + + let symbol_name = function_symbol_name(function); + let fn_type = self.function_type(function, program)?; + let fn_value = if let Some(existing) = self.module.get_function(&symbol_name) { + existing + } else { + self.module.add_function(&symbol_name, fn_type, None) + }; + + fn_value.set_call_conventions(llvm_calling_convention(function.calling_conv)); + self.apply_function_attributes(fn_value, &function.attributes)?; + + if needs_sret( + &function.ret_ty, + function.calling_conv, + self.target_is_windows(), + program, + ) { + let ret_any_ty = air_type_to_llvm(&function.ret_ty, self.context)?; + let sret_attr = self + .context + .create_type_attribute(Attribute::get_named_enum_kind_id("sret"), ret_any_ty); + fn_value.add_attribute(AttributeLoc::Param(0), sret_attr); + } + } + + Ok(()) + } + + fn ensure_no_reserved_bootstrap_builtins( + &self, + program: &AirProgram, + ) -> Result<(), CodegenError> { + if let Some(function) = program + .functions + .iter() + .find(|function| is_reserved_bootstrap_builtin(&function.name)) + { + return Err(CodegenError::UnsupportedInstruction( + reserved_bootstrap_builtin_message(&function.name), + )); + } + Ok(()) + } + + pub(crate) fn define_function_bodies(&self, program: &AirProgram) -> Result<(), CodegenError> { + let mut function_names = HashMap::with_capacity(program.functions.len()); + for function in &program.functions { + function_names.insert(function.id, function_symbol_name(function)); + } + + for function in &program.functions { + if function.is_extern { + continue; + } + + let symbol_name = function_symbol_name(function); + let fn_value = self.module.get_function(&symbol_name).ok_or_else(|| { + CodegenError::LlvmError(format!( + "missing declared LLVM function for {}", + function.name + )) + })?; + + let mut fcx = FunctionCodegen::new( + self.context, + &self.module, + fn_value, + function, + program, + &function_names, + ); + fcx.generate()?; + } + + Ok(()) + } + + pub(crate) fn emit_entry_wrapper(&self, program: &AirProgram) -> Result<(), CodegenError> { + let Some(user_main) = program + .functions + .iter() + .find(|function| !function.is_extern && function.name == "main") + else { + return Ok(()); + }; + + if !user_main.params.is_empty() { + return Err(CodegenError::InvalidNativeEntry(format!( + "main must have no parameters (found {})", + user_main.params.len() + ))); + } + + if !matches!(user_main.ret_ty, AirType::Void | AirType::I64) { + return Err(CodegenError::InvalidNativeEntry(format!( + "main return type must be void or i64 (found {})", + native_entry_type_name(&user_main.ret_ty) + ))); + } + + if self.module.get_function(NATIVE_ENTRY_SYMBOL).is_some() { + return Err(CodegenError::InvalidNativeEntry(format!( + "symbol '{}' is reserved by the native runtime", + NATIVE_ENTRY_SYMBOL + ))); + } + + let user_symbol = function_symbol_name(user_main); + let user_fn = self + .module + .get_function(&user_symbol) + .ok_or_else(|| CodegenError::LlvmError(format!("missing function {}", user_symbol)))?; + + let wrapper_ty = self.context.i64_type().fn_type(&[], false); + let wrapper = self + .module + .add_function(NATIVE_ENTRY_SYMBOL, wrapper_ty, None); + wrapper.set_call_conventions(llvm_calling_convention(AirCallingConv::C)); + + let builder = self.context.create_builder(); + let entry = self.context.append_basic_block(wrapper, "entry"); + builder.position_at_end(entry); + // __aelys_main has an implicit env parameter; pass null. + let null_env = self + .context + .ptr_type(AddressSpace::default()) + .const_null(); + let call = builder + .build_call(user_fn, &[null_env.into()], "user_main") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + call.set_call_convention(user_fn.get_call_conventions()); + + let return_value = if matches!(user_main.ret_ty, AirType::Void) { + self.context.i64_type().const_zero() + } else { + call.try_as_basic_value() + .basic() + .ok_or_else(|| { + CodegenError::LlvmError(format!( + "function {} returned void for i64 native entry", + user_symbol + )) + })? + .into_int_value() + }; + + builder + .build_return(Some(&return_value)) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + Ok(()) + } + + fn function_type( + &self, + function: &AirFunction, + program: &AirProgram, + ) -> Result, CodegenError> { + let use_sret = needs_sret( + &function.ret_ty, + function.calling_conv, + self.target_is_windows(), + program, + ); + let has_implicit_env = function_has_implicit_env(function); + let extra = usize::from(use_sret) + usize::from(has_implicit_env); + let mut params = Vec::with_capacity(function.params.len() + extra); + if use_sret { + params.push(self.context.ptr_type(AddressSpace::default()).into()); + } + if has_implicit_env { + // Implicit env pointer (ptr) as first user-visible param + params.push(self.context.ptr_type(AddressSpace::default()).into()); + } + for param in &function.params { + let param_ty: BasicMetadataTypeEnum<'static> = + air_basic_type_to_llvm(¶m.ty, self.context)?.into(); + params.push(param_ty); + } + + if matches!(function.ret_ty, AirType::Void) || use_sret { + return Ok(self.context.void_type().fn_type(¶ms, false)); + } + + Ok(air_basic_type_to_llvm(&function.ret_ty, self.context)?.fn_type(¶ms, false)) + } + + fn apply_function_attributes( + &self, + function: FunctionValue<'static>, + attrs: &FunctionAttribs, + ) -> Result<(), CodegenError> { + match attrs.inline { + InlineHint::Default => {} + InlineHint::Always => self.add_function_attribute(function, "alwaysinline")?, + InlineHint::Never => self.add_function_attribute(function, "noinline")?, + } + + if attrs.no_unwind { + self.add_function_attribute(function, "nounwind")?; + } + + if attrs.cold { + self.add_function_attribute(function, "cold")?; + } + + Ok(()) + } + + fn add_function_attribute( + &self, + function: FunctionValue<'static>, + attribute_name: &str, + ) -> Result<(), CodegenError> { + let kind_id = Attribute::get_named_enum_kind_id(attribute_name); + if kind_id == 0 { + return Err(CodegenError::LlvmError(format!( + "unknown LLVM function attribute: {}", + attribute_name + ))); + } + + let attr = self.context.create_enum_attribute(kind_id, 0); + function.add_attribute(AttributeLoc::Function, attr); + Ok(()) + } +} + +/// Returns true if a function gets an implicit `env: ptr` prepended to its +/// LLVM parameter list. +/// +/// This is every non-extern Aelys-convention function that doesn't already have `__env` as its first AIR parameter (closures) +/// +/// The distinction matters :: +/// +/// named functions get env added here (codegen-only, invisible in AIR), while closures already have it in their AIR param list +/// (added by lower_closure). +/// +/// Both end up with env at LLVM param 0. Without this check, closures would get env twice and indirect calls would pass the wrong number of arguments. +pub(crate) fn function_has_implicit_env(function: &AirFunction) -> bool { + if function.is_extern || !matches!(function.calling_conv, AirCallingConv::Aelys) { + return false; + } + // Closures already declare __env as their first AIR param. + !function + .params + .first() + .is_some_and(|p| p.name == "__env") +} + +pub(crate) fn llvm_calling_convention(conv: AirCallingConv) -> u32 { + match conv { + AirCallingConv::Aelys => LLVMCallConv::LLVMFastCallConv as u32, + AirCallingConv::C => LLVMCallConv::LLVMCCallConv as u32, + AirCallingConv::Rust => LLVMCallConv::LLVMCCallConv as u32, + } +} + +pub(crate) fn function_symbol_name(function: &AirFunction) -> String { + if !function.is_extern && function.name == "main" { + USER_MAIN_SYMBOL.to_string() + } else { + function.name.clone() + } +} + +/// Returns true if the type lowers to an aggregate that we must not pass +/// by value across the C ABI on Windows/MSVC. +/// Data enums count here because LLVM sees them as structs, not i32 tags. +pub(crate) fn is_abi_unsafe_type(ty: &AirType, program: &AirProgram) -> bool { + matches!( + ty, + AirType::Str | AirType::Struct(_) | AirType::Slice(_) | AirType::Array(_, _) + ) || matches!(ty, AirType::Enum(name) if program + .enums + .iter() + .find(|def| def.name == *name) + .is_some_and(enum_has_data)) +} + +/// True when a function with this return type + calling convention needs sret +/// on the current target. Only C-convention functions need sret because +/// fastcc (Aelys-internal) is handled consistently by LLVM itself +pub(crate) fn needs_sret( + ret_ty: &AirType, + conv: AirCallingConv, + is_windows: bool, + program: &AirProgram, +) -> bool { + is_windows && matches!(conv, AirCallingConv::C) && is_abi_unsafe_type(ret_ty, program) +} + +fn reject_struct_abi_on_c_function( + function: &AirFunction, + program: &AirProgram, +) -> Result<(), CodegenError> { + for param in &function.params { + if matches!(¶m.ty, AirType::Enum(name) if program + .enums + .iter() + .find(|def| def.name == *name) + .is_some_and(enum_has_data)) + { + return Err(CodegenError::UnsupportedType(format!( + "C-convention function '{}' has enum parameter '{}' (type {:?}), \ + data enums must not cross the C ABI by value", + function.name, param.name, param.ty + ))); + } + if is_abi_unsafe_type(¶m.ty, program) { + return Err(CodegenError::UnsupportedType(format!( + "C-convention function '{}' has struct parameter '{}' (type {:?}), \ + struct params must be flattened to scalars for C ABI compatibility", + function.name, param.name, param.ty + ))); + } + } + // struct returns are handled via sret in function_type() + declare_functions() + Ok(()) +} + +fn native_entry_type_name(ty: &AirType) -> &'static str { + match ty { + AirType::I8 => "i8", + AirType::I16 => "i16", + AirType::I32 => "i32", + AirType::I64 => "i64", + AirType::U8 => "u8", + AirType::U16 => "u16", + AirType::U32 => "u32", + AirType::U64 => "u64", + AirType::F32 => "f32", + AirType::F64 => "f64", + AirType::Bool => "bool", + AirType::Str => "string", + AirType::Ptr(_) => "ptr", + AirType::Struct(_) => "struct", + AirType::Enum(_) => "enum", + AirType::Array(_, _) => "array", + AirType::Slice(_) => "slice", + AirType::FnPtr { .. } => "fn", + AirType::Param(_) => "param", + AirType::Opaque => "opaque", + AirType::Void => "void", + } +} diff --git a/codegen/src/lowering/globals.rs b/codegen/src/lowering/globals.rs new file mode 100644 index 0000000..ee22c29 --- /dev/null +++ b/codegen/src/lowering/globals.rs @@ -0,0 +1,770 @@ +use crate::CodegenContext; +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::lowering::functions::function_symbol_name; +use crate::types::{aelys_string_type, air_basic_type_to_llvm, closure_fat_ptr_type}; +use aelys_air::{ + AirConst, AirEnumDef, AirGlobal, AirProgram, AirType, Operand, + layout::{enum_has_data, enum_max_payload_size, resolved_layout}, +}; +use inkwell::AddressSpace; +use inkwell::module::Linkage; +use inkwell::values::{BasicValueEnum, IntValue, PointerValue}; + +pub(crate) const GLOBAL_GET_PREFIX: &str = "__aelys_global_get_"; +pub(crate) const GLOBAL_SET_PREFIX: &str = "__aelys_global_set_"; +const GLOBAL_STORAGE_PREFIX: &str = "__aelys_global_"; + +pub(crate) fn global_storage_name(name: &str) -> String { + format!("{GLOBAL_STORAGE_PREFIX}{name}") +} + +impl CodegenContext { + pub(crate) fn declare_globals(&self, program: &AirProgram) -> Result<(), CodegenError> { + for global in &program.globals { + let llvm_ty = air_basic_type_to_llvm(&global.ty, self.context)?; + let symbol = global_storage_name(&global.name); + let global_value = if let Some(existing) = self.module.get_global(&symbol) { + existing + } else { + self.module.add_global(llvm_ty, None, &symbol) + }; + global_value.set_linkage(Linkage::Internal); + + let init = self.global_initializer(global, program)?; + global_value.set_initializer(&init); + } + + Ok(()) + } + + fn global_initializer( + &self, + global: &AirGlobal, + program: &AirProgram, + ) -> Result, CodegenError> { + let Some(init) = global.init.as_ref() else { + return Err(CodegenError::UnsupportedInstruction(format!( + "global '{}' requires a compile-time constant initializer", + global.name + ))); + }; + + match init { + AirConst::IntLiteral(value) => self.int_initializer(&global.ty, *value, program), + AirConst::Int(value, _) => self.int_initializer(&global.ty, *value, program), + AirConst::Float(value, _) => self.float_initializer(&global.ty, *value), + AirConst::Bool(value) => { + if !matches!(global.ty, AirType::Bool) { + return Err(CodegenError::UnsupportedType(format!( + "global '{}' has bool initializer but non-bool type {:?}", + global.name, global.ty + ))); + } + Ok(self + .context + .bool_type() + .const_int(u64::from(*value), false) + .into()) + } + AirConst::Str(text) => self.string_initializer(&global.name, &global.ty, text), + AirConst::Null => match global.ty { + AirType::Ptr(_) => Ok(self + .context + .ptr_type(AddressSpace::default()) + .const_null() + .into()), + _ => Err(CodegenError::UnsupportedType(format!( + "global '{}' uses null initializer with non-pointer type {:?}", + global.name, global.ty + ))), + }, + AirConst::FnRef(name) => self.fnref_initializer(&global.name, &global.ty, name, program), + AirConst::Enum { + enum_name, + tag, + payload, + } => self.enum_initializer(&global.name, &global.ty, enum_name, *tag, payload, program), + AirConst::ZeroInit(ty) if *ty == global.ty => { + Ok(air_basic_type_to_llvm(ty, self.context)?.const_zero()) + } + AirConst::ZeroInit(ty) => Err(CodegenError::UnsupportedType(format!( + "global '{}' has mismatched zeroinit type {:?} for {:?}", + global.name, ty, global.ty + ))), + AirConst::Array(elems) => self.array_initializer(&global.name, &global.ty, elems, program), + AirConst::Struct { name: struct_name, fields } => { + self.struct_initializer(&global.name, &global.ty, struct_name, fields, program) + } + other => Err(CodegenError::UnsupportedInstruction(format!( + "global '{}' has unsupported initializer kind {}", + global.name, + crate::lowering::operands::constant_kind_name(other) + ))), + } + } + + fn array_initializer( + &self, + global_name: &str, + ty: &AirType, + elems: &[AirConst], + program: &AirProgram, + ) -> Result, CodegenError> { + let AirType::Array(elem_ty, _) = ty else { + return Err(CodegenError::UnsupportedType(format!( + "global '{}' has Array initializer but non-array type {:?}", + global_name, ty + ))); + }; + let elem_llvm_ty = air_basic_type_to_llvm(elem_ty, self.context)?; + // Build a temporary AirGlobal for each element so we can reuse + // the existing scalar initializer paths. + let elem_consts: Result>, _> = elems + .iter() + .map(|c| { + let elem_global = AirGlobal { + name: global_name.to_string(), + ty: (**elem_ty).clone(), + init: Some(c.clone()), + gc_mode: aelys_air::GcMode::Manual, + span: None, + }; + self.global_initializer(&elem_global, program) + }) + .collect(); + let elem_values = elem_consts?; + + // Build the LLVM const array for the element type. + let const_arr: BasicValueEnum<'static> = match elem_llvm_ty { + inkwell::types::BasicTypeEnum::IntType(t) => { + let vals: Vec<_> = elem_values + .iter() + .map(|v| v.into_int_value()) + .collect(); + t.const_array(&vals).into() + } + inkwell::types::BasicTypeEnum::FloatType(t) => { + let vals: Vec<_> = elem_values + .iter() + .map(|v| v.into_float_value()) + .collect(); + t.const_array(&vals).into() + } + inkwell::types::BasicTypeEnum::PointerType(t) => { + let vals: Vec<_> = elem_values + .iter() + .map(|v| v.into_pointer_value()) + .collect(); + t.const_array(&vals).into() + } + inkwell::types::BasicTypeEnum::StructType(t) => { + let vals: Vec<_> = elem_values + .iter() + .map(|v| v.into_struct_value()) + .collect(); + t.const_array(&vals).into() + } + inkwell::types::BasicTypeEnum::ArrayType(t) => { + let vals: Vec<_> = elem_values + .iter() + .map(|v| v.into_array_value()) + .collect(); + t.const_array(&vals).into() + } + other => { + return Err(CodegenError::UnsupportedType(format!( + "global array '{}' has unsupported element type {:?}", + global_name, other + ))); + } + }; + Ok(const_arr) + } + + fn struct_initializer( + &self, + global_name: &str, + ty: &AirType, + struct_name: &str, + fields: &[(String, AirConst)], + program: &AirProgram, + ) -> Result, CodegenError> { + let AirType::Struct(_) = ty else { + return Err(CodegenError::UnsupportedType(format!( + "global '{}' has Struct initializer but type is {:?}", + global_name, ty + ))); + }; + let llvm_struct_ty = self.context.get_struct_type(struct_name).ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "global '{}': struct type '{}' not declared", + global_name, struct_name + )) + })?; + // Look up the canonical field order from the AIR program. + let struct_def = program + .structs + .iter() + .find(|s| s.name == struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "global '{}': struct def '{}' not found in program", + global_name, struct_name + )) + })?; + // Build field values in canonical order. + let mut field_values: Vec> = Vec::with_capacity(struct_def.fields.len()); + for struct_field in &struct_def.fields { + let (_, field_const) = fields + .iter() + .find(|(fname, _)| fname == &struct_field.name) + .ok_or_else(|| { + CodegenError::UnsupportedInstruction(format!( + "global '{}': missing field '{}' in struct initializer", + global_name, struct_field.name + )) + })?; + let field_global = AirGlobal { + name: format!("{}_{}", global_name, struct_field.name), + ty: struct_field.ty.clone(), + init: Some(field_const.clone()), + gc_mode: aelys_air::GcMode::Manual, + span: None, + }; + field_values.push(self.global_initializer(&field_global, program)?); + } + Ok(llvm_struct_ty.const_named_struct(&field_values).into()) + } + + fn int_initializer( + &self, + ty: &AirType, + value: i64, + program: &AirProgram, + ) -> Result, CodegenError> { + let const_value = match ty { + AirType::I8 => self.context.i8_type().const_int(value as u64, true).into(), + AirType::I16 => self.context.i16_type().const_int(value as u64, true).into(), + AirType::I32 => self.context.i32_type().const_int(value as u64, true).into(), + AirType::I64 => self.context.i64_type().const_int(value as u64, true).into(), + AirType::U8 => self.context.i8_type().const_int(value as u64, false).into(), + AirType::U16 => self.context.i16_type().const_int(value as u64, false).into(), + AirType::U32 => self.context.i32_type().const_int(value as u64, false).into(), + AirType::U64 => self.context.i64_type().const_int(value as u64, false).into(), + AirType::Enum(name) => self.enum_int_initializer(name, value, program)?, + other => { + return Err(CodegenError::UnsupportedType(format!( + "integer global initializer is not supported for {:?}", + other + ))); + } + }; + Ok(const_value) + } + + fn enum_int_initializer( + &self, + name: &str, + value: i64, + program: &AirProgram, + ) -> Result, CodegenError> { + let enum_struct_name = format!("__aelys_enum_{}", name); + if self.context.get_struct_type(&enum_struct_name).is_none() { + return Ok(self.context.i32_type().const_int(value as u64, false).into()); + } + + let enum_def = program + .enums + .iter() + .find(|def| def.name == name) + .ok_or_else(|| CodegenError::UnsupportedType(format!("unknown enum type {:?}", name)))?; + if !enum_has_data(enum_def) { + return Ok(self.context.i32_type().const_int(value as u64, false).into()); + } + + let tag = u32::try_from(value).map_err(|_| { + CodegenError::UnsupportedType(format!( + "enum global initializer tag {value} is out of range for {name}" + )) + })?; + let variant = enum_def + .variants + .iter() + .find(|variant| variant.tag == tag) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "enum global initializer tag {tag} does not exist on {name}" + )) + })?; + if !variant.payload.is_empty() { + return Err(CodegenError::UnsupportedInstruction(format!( + "global enum '{}' needs payload data for variant '{}'", + name, variant.name + ))); + } + + // Unit variants in data enums still use aggregate storage, so keep the + // payload byte array zeroed while materializing the tag as a constant. + let enum_ty = self + .context + .get_struct_type(&enum_struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown enum struct type: {}", enum_struct_name)) + })?; + let payload_len = enum_max_payload_size(enum_def, &program.struct_sizes); + let payload = self + .context + .i8_type() + .array_type(payload_len) + .const_zero(); + Ok(enum_ty + .const_named_struct(&[ + self.context.i32_type().const_int(tag as u64, false).into(), + payload.into(), + ]) + .into()) + } + + fn enum_initializer( + &self, + global_name: &str, + ty: &AirType, + enum_name: &str, + tag: u32, + payload: &[AirConst], + program: &AirProgram, + ) -> Result, CodegenError> { + let AirType::Enum(global_enum_name) = ty else { + return Err(CodegenError::UnsupportedType(format!( + "global '{}' uses enum initializer with non-enum type {:?}", + global_name, ty + ))); + }; + if global_enum_name != enum_name { + return Err(CodegenError::UnsupportedType(format!( + "global '{}' enum initializer name '{}' does not match declared type '{}'", + global_name, enum_name, global_enum_name + ))); + } + + let enum_def = program + .enums + .iter() + .find(|def| def.name == enum_name) + .ok_or_else(|| CodegenError::UnsupportedType(format!("unknown enum type {:?}", enum_name)))?; + if !enum_has_data(enum_def) { + if !payload.is_empty() { + return Err(CodegenError::UnsupportedInstruction(format!( + "simple enum '{}' cannot carry payload data in global '{}'", + enum_name, global_name + ))); + } + return Ok(self.context.i32_type().const_int(tag as u64, false).into()); + } + + let variant = enum_def + .variants + .iter() + .find(|variant| variant.tag == tag) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "enum global initializer tag {tag} does not exist on {enum_name}" + )) + })?; + if payload.len() != variant.payload.len() { + return Err(CodegenError::UnsupportedInstruction(format!( + "global '{}' enum variant '{}' expected {} payload fields, found {}", + global_name, + variant.name, + variant.payload.len(), + payload.len() + ))); + } + + let enum_struct_name = format!("__aelys_enum_{}", enum_name); + let enum_ty = self + .context + .get_struct_type(&enum_struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown enum struct type: {}", enum_struct_name)) + })?; + let payload_len = enum_max_payload_size(enum_def, &program.struct_sizes); + let payload_bytes = + self.enum_payload_initializer_bytes(global_name, enum_def, variant, payload, program)?; + let payload = self.context.i8_type().const_array(&payload_bytes); + debug_assert_eq!(payload_len as usize, payload_bytes.len()); + Ok(enum_ty + .const_named_struct(&[ + self.context.i32_type().const_int(tag as u64, false).into(), + payload.into(), + ]) + .into()) + } + + fn float_initializer( + &self, + ty: &AirType, + value: f64, + ) -> Result, CodegenError> { + let const_value = match ty { + AirType::F32 => self.context.f32_type().const_float(value).into(), + AirType::F64 => self.context.f64_type().const_float(value).into(), + other => { + return Err(CodegenError::UnsupportedType(format!( + "float global initializer is not supported for {:?}", + other + ))); + } + }; + Ok(const_value) + } + + fn string_initializer( + &self, + name: &str, + ty: &AirType, + text: &str, + ) -> Result, CodegenError> { + if !matches!(ty, AirType::Str) { + return Err(CodegenError::UnsupportedType(format!( + "string global initializer is not supported for {:?}", + ty + ))); + } + + let bytes = text.as_bytes(); + let array_len = u32::try_from(bytes.len() + 1).map_err(|_| { + CodegenError::UnsupportedType(format!("string global '{}' is too large", name)) + })?; + let array_ty = self.context.i8_type().array_type(array_len); + let storage_name = format!("{}{}_bytes", GLOBAL_STORAGE_PREFIX, name); + let backing = if let Some(existing) = self.module.get_global(&storage_name) { + existing + } else { + let global_value = self.module.add_global(array_ty, None, &storage_name); + global_value.set_linkage(Linkage::Private); + global_value.set_constant(true); + let mut nul_terminated = bytes.to_vec(); + nul_terminated.push(0); + let byte_values: Vec<_> = nul_terminated + .iter() + .map(|byte| self.context.i8_type().const_int(*byte as u64, false)) + .collect(); + global_value.set_initializer(&self.context.i8_type().const_array(&byte_values)); + global_value + }; + + let zero = self.context.i64_type().const_zero(); + let ptr = unsafe { + backing + .as_pointer_value() + .const_in_bounds_gep(array_ty, &[zero, zero]) + }; + let len = self.context.i64_type().const_int(bytes.len() as u64, false); + Ok(aelys_string_type(self.context) + .const_named_struct(&[ptr.into(), len.into()]) + .into()) + } + + fn fnref_initializer( + &self, + global_name: &str, + ty: &AirType, + function_name: &str, + program: &AirProgram, + ) -> Result, CodegenError> { + let conv = match ty { + AirType::FnPtr { conv, .. } => *conv, + _ => { + return Err(CodegenError::UnsupportedType(format!( + "global '{}' uses fnref initializer with non-fn type {:?}", + global_name, ty + ))); + } + }; + + let symbol_name = program + .functions + .iter() + .find(|function| function.name == function_name) + .map(function_symbol_name) + .unwrap_or_else(|| function_name.to_string()); + // Globals are emitted before bodies, so they need the declared LLVM symbol. + let func = self.module.get_function(&symbol_name).ok_or_else(|| { + CodegenError::LlvmError(format!( + "global '{}' references unknown function '{}'", + global_name, function_name + )) + })?; + let fn_ptr = func.as_global_value().as_pointer_value(); + + if matches!(conv, aelys_air::CallingConv::Aelys) { + // Aelys-convention function values are fat pointers { fn_ptr, env_ptr }. + // Named functions have no captures, so env_ptr is null. + let null_env = self + .context + .ptr_type(AddressSpace::default()) + .const_null(); + let fat = closure_fat_ptr_type(self.context) + .const_named_struct(&[fn_ptr.into(), null_env.into()]); + Ok(fat.into()) + } else { + Ok(fn_ptr.into()) + } + } + + fn enum_payload_initializer_bytes( + &self, + global_name: &str, + enum_def: &AirEnumDef, + variant: &aelys_air::AirEnumVariant, + payload: &[AirConst], + program: &AirProgram, + ) -> Result>, CodegenError> { + let payload_len = enum_max_payload_size(enum_def, &program.struct_sizes); + let mut bytes = vec![self.context.i8_type().const_zero(); payload_len as usize]; + let mut byte_offset = 0u32; + + for (index, (field_ty, field_const)) in variant.payload.iter().zip(payload.iter()).enumerate() { + let field_layout = resolved_layout(field_ty, &program.struct_sizes); + byte_offset = align_to(byte_offset, field_layout.align); + + // Constant globals still store enum payloads in the raw byte array layout. + // Pack fields with the same AIR-computed offsets as runtime EnumInit. + let field_bytes = self.const_bytes( + &format!("{global_name}_{}_{}", variant.name, index), + field_ty, + field_const, + program, + )?; + if field_bytes.len() != field_layout.size as usize { + return Err(CodegenError::UnsupportedInstruction(format!( + "global '{}' field {} for enum '{}' serialized to {} bytes, expected {}", + global_name, + index, + enum_def.name, + field_bytes.len(), + field_layout.size + ))); + } + let start = byte_offset as usize; + let end = start + field_bytes.len(); + bytes[start..end].clone_from_slice(&field_bytes); + byte_offset += field_layout.size; + } + + Ok(bytes) + } + + fn const_bytes( + &self, + name: &str, + ty: &AirType, + constant: &AirConst, + program: &AirProgram, + ) -> Result>, CodegenError> { + match (ty, constant) { + (AirType::I8 | AirType::U8, AirConst::Int(value, _)) + | (AirType::I8 | AirType::U8, AirConst::IntLiteral(value)) => Ok(vec![ + self.context.i8_type().const_int(*value as u64, false), + ]), + (AirType::Bool, AirConst::Bool(value)) => { + Ok(vec![self.context.i8_type().const_int(u64::from(*value), false)]) + } + ( + AirType::I16 + | AirType::I32 + | AirType::I64 + | AirType::U16 + | AirType::U32 + | AirType::U64, + AirConst::Int(value, _) | AirConst::IntLiteral(value), + ) => self.integer_bytes(ty, *value), + (AirType::F32, AirConst::Float(value, _)) => Ok(f32::to_le_bytes(*value as f32) + .into_iter() + .map(|byte| self.context.i8_type().const_int(byte as u64, false)) + .collect()), + (AirType::F64, AirConst::Float(value, _)) => Ok(f64::to_le_bytes(*value) + .into_iter() + .map(|byte| self.context.i8_type().const_int(byte as u64, false)) + .collect()), + (AirType::Ptr(_), AirConst::Null) => Ok(vec![self.context.i8_type().const_zero(); 8]), + (AirType::Enum(enum_name), AirConst::Int(value, _) | AirConst::IntLiteral(value)) => { + let tag = u32::try_from(*value).map_err(|_| { + CodegenError::UnsupportedType(format!( + "enum initializer tag {value} is out of range for {enum_name}" + )) + })?; + self.enum_value_bytes(name, enum_name, tag, &[], program) + } + ( + AirType::Enum(enum_name), + AirConst::Enum { + enum_name: const_enum_name, + tag, + payload, + }, + ) => { + if enum_name != const_enum_name { + return Err(CodegenError::UnsupportedType(format!( + "nested enum constant '{}' does not match expected '{}'", + const_enum_name, enum_name + ))); + } + self.enum_value_bytes(name, enum_name, *tag, payload, program) + } + _ => Err(CodegenError::UnsupportedInstruction(format!( + "global '{}' cannot serialize {} as {:?}", + name, + crate::lowering::operands::constant_kind_name(constant), + ty + ))), + } + } + + fn enum_value_bytes( + &self, + name: &str, + enum_name: &str, + tag: u32, + payload: &[AirConst], + program: &AirProgram, + ) -> Result>, CodegenError> { + let enum_def = program + .enums + .iter() + .find(|def| def.name == enum_name) + .ok_or_else(|| CodegenError::UnsupportedType(format!("unknown enum type {:?}", enum_name)))?; + if !enum_has_data(enum_def) { + return self.integer_bytes(&AirType::I32, tag as i64); + } + + let layout = resolved_layout(&AirType::Enum(enum_name.to_string()), &program.struct_sizes); + let payload_align = enum_payload_align(enum_def, &program.struct_sizes); + let payload_offset = align_to(4, payload_align); + let variant = enum_def + .variants + .iter() + .find(|variant| variant.tag == tag) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "enum initializer tag {tag} does not exist on {enum_name}" + )) + })?; + if payload.len() != variant.payload.len() { + return Err(CodegenError::UnsupportedInstruction(format!( + "enum '{}' expected {} payload fields for tag {}, found {}", + enum_name, + variant.payload.len(), + tag, + payload.len() + ))); + } + + let mut bytes = vec![self.context.i8_type().const_zero(); layout.size as usize]; + let tag_bytes = self.integer_bytes(&AirType::I32, tag as i64)?; + bytes[..4].clone_from_slice(&tag_bytes); + let payload_bytes = self.enum_payload_initializer_bytes(name, enum_def, variant, payload, program)?; + let start = payload_offset as usize; + let end = start + payload_bytes.len(); + bytes[start..end].clone_from_slice(&payload_bytes); + Ok(bytes) + } + + fn integer_bytes( + &self, + ty: &AirType, + value: i64, + ) -> Result>, CodegenError> { + let bytes = match ty { + AirType::I8 => vec![(value as i8) as u8], + AirType::U8 => vec![value as u8], + AirType::I16 => (value as i16).to_le_bytes().to_vec(), + AirType::U16 => (value as u16).to_le_bytes().to_vec(), + AirType::I32 => (value as i32).to_le_bytes().to_vec(), + AirType::U32 => (value as u32).to_le_bytes().to_vec(), + AirType::I64 => value.to_le_bytes().to_vec(), + AirType::U64 => (value as u64).to_le_bytes().to_vec(), + other => { + return Err(CodegenError::UnsupportedType(format!( + "integer byte serialization is not supported for {:?}", + other + ))); + } + }; + Ok(bytes + .into_iter() + .map(|byte| self.context.i8_type().const_int(byte as u64, false)) + .collect()) + } +} + +fn align_to(offset: u32, align: u32) -> u32 { + (offset + align - 1) & !(align - 1) +} + +fn enum_payload_align( + def: &AirEnumDef, + sizes: &std::collections::HashMap, +) -> u32 { + def.variants + .iter() + .flat_map(|variant| variant.payload.iter()) + .map(|ty| resolved_layout(ty, sizes).align) + .max() + .unwrap_or(1) +} + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_global_get( + &mut self, + name: &str, + args: &[Operand], + ) -> Result>, CodegenError> { + if !args.is_empty() { + return Err(CodegenError::UnsupportedInstruction(format!( + "global getter '{}' does not take arguments", + name + ))); + } + + let global = self.lookup_program_global(name)?; + let ptr = self.lookup_global_ptr(name)?; + let llvm_ty = air_basic_type_to_llvm(&global.ty, self.context)?; + Ok(Some(self.load_value(llvm_ty, ptr, "global_load")?)) + } + + pub(crate) fn generate_global_set( + &mut self, + name: &str, + args: &[Operand], + ) -> Result>, CodegenError> { + if args.len() != 1 { + return Err(CodegenError::UnsupportedInstruction(format!( + "global setter '{}' expects exactly one argument", + name + ))); + } + + let ptr = self.lookup_global_ptr(name)?; + let value = self.generate_operand(&args[0])?; + self.store_value(ptr, value)?; + Ok(None) + } + + fn lookup_program_global(&self, name: &str) -> Result<&AirGlobal, CodegenError> { + self.program + .globals + .iter() + .find(|global| global.name == name) + .ok_or_else(|| CodegenError::UnsupportedInstruction(format!("unknown global '{}'", name))) + } + + fn lookup_global_ptr(&self, name: &str) -> Result, CodegenError> { + self.module + .get_global(&global_storage_name(name)) + .map(|global| global.as_pointer_value()) + .ok_or_else(|| CodegenError::LlvmError(format!("missing LLVM global '{}'", name))) + } +} diff --git a/codegen/src/lowering/memory.rs b/codegen/src/lowering/memory.rs new file mode 100644 index 0000000..abcfce2 --- /dev/null +++ b/codegen/src/lowering/memory.rs @@ -0,0 +1,141 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::types::alignment_of; +use aelys_air::AirType; +use inkwell::types::BasicTypeEnum; +use inkwell::values::{BasicValue, BasicValueEnum, PointerValue}; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn store_value( + &self, + ptr: PointerValue<'static>, + value: BasicValueEnum<'static>, + ) -> Result<(), CodegenError> { + let store = self + .builder + .build_store(ptr, value) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + store + .set_alignment(alignment_of(value.get_type())) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + + pub(crate) fn load_value( + &self, + ty: BasicTypeEnum<'static>, + ptr: PointerValue<'static>, + name: &str, + ) -> Result, CodegenError> { + let value = self + .builder + .build_load(ty, ptr, name) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + if let Some(instruction) = value.as_instruction_value() { + instruction + .set_alignment(alignment_of(ty)) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + } + Ok(value) + } + + pub(crate) fn align_alloca( + &self, + ptr: PointerValue<'static>, + ty: BasicTypeEnum<'static>, + ) -> Result<(), CodegenError> { + if let Some(inst) = ptr.as_instruction() { + inst.set_alignment(alignment_of(ty)) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + } + Ok(()) + } + + pub(crate) fn air_type_size(&self, ty: &AirType) -> Result { + self.type_size_align(ty).map(|(size, _)| size) + } + + fn type_size_align(&self, ty: &AirType) -> Result<(u32, u32), CodegenError> { + match ty { + AirType::I8 | AirType::U8 | AirType::Bool => Ok((1, 1)), + AirType::I16 | AirType::U16 => Ok((2, 2)), + AirType::I32 | AirType::U32 | AirType::F32 => Ok((4, 4)), + AirType::I64 | AirType::U64 | AirType::F64 => Ok((8, 8)), + AirType::Ptr(_) => Ok((8, 8)), + AirType::FnPtr { conv, .. } => { + if matches!(conv, aelys_air::CallingConv::Aelys) { + // Fat pointer { fn_ptr, env_ptr }: two pointers. + // C/Rust FnPtrs are bare pointers (single ptr, 8 bytes). + Ok((16, 8)) + } else { + Ok((8, 8)) + } + } + AirType::Str => Ok((16, 8)), + AirType::Void => Ok((0, 1)), + AirType::Slice(_) => Ok((16, 8)), + AirType::Array(inner, n) => { + let (size, align) = self.type_size_align(inner)?; + Ok((size.saturating_mul(*n as u32), align)) + } + AirType::Enum(name) => { + // Look up pre-computed enum size from AIR layout pass. + // Data enums are larger than 4 bytes (tag + payload). + if let Some(layout) = self.program.struct_sizes.get(name.as_str()) { + Ok((layout.size, layout.align)) + } else { + // Simple enum (no data variants): just the i32 tag + Ok((4, 4)) + } + } + AirType::Struct(name) => { + let def = self + .program + .structs + .iter() + .find(|s| s.name == *name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown struct {}", name)) + })?; + if def.fields.is_empty() { + return Ok((0, 1)); + } + // Codegen must never compute layout, that's AIR's job. + // If offsets are missing, the AIR is just malformed + if !def.fields.iter().all(|f| f.offset.is_some()) { + return Err(CodegenError::UnsupportedType(format!( + "struct `{}` has uncomputed field offsets; AIR layout pass was not run", + name + ))); + } + let mut max_align = 1u32; + let mut end = 0u32; + for field in &def.fields { + let (fs, fa) = self.type_size_align(&field.ty)?; + max_align = max_align.max(fa); + end = end.max( + field + .offset + .expect("offset checked above") + .saturating_add(fs), + ); + } + Ok((align_to(end, max_align), max_align)) + } + AirType::Param(id) => Err(CodegenError::UnsupportedType(format!( + "unexpected unresolved type parameter {:?}", + id + ))), + AirType::Opaque => Err(CodegenError::UnsupportedType( + "unresolved Dynamic type reached codegen".to_string(), + )), + } + } +} + +fn align_to(offset: u32, align: u32) -> u32 { + if align == 0 { + return offset; + } + (offset + align - 1) & !(align - 1) +} diff --git a/codegen/src/lowering/mod.rs b/codegen/src/lowering/mod.rs new file mode 100644 index 0000000..3056d58 --- /dev/null +++ b/codegen/src/lowering/mod.rs @@ -0,0 +1,14 @@ +pub(crate) mod body; +pub(crate) mod calls; +pub(crate) mod casts; +pub(crate) mod functions; +pub(crate) mod globals; +pub(crate) mod memory; +pub(crate) mod operands; +pub(crate) mod ops; +pub(crate) mod runtime; +pub(crate) mod rvalues; +pub(crate) mod stmts; +pub(crate) mod strings; +pub(crate) mod structs; +pub(crate) mod terminators; diff --git a/codegen/src/lowering/operands.rs b/codegen/src/lowering/operands.rs new file mode 100644 index 0000000..6330aea --- /dev/null +++ b/codegen/src/lowering/operands.rs @@ -0,0 +1,155 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::lowering::functions::function_symbol_name; +use crate::types::air_basic_type_to_llvm; +use aelys_air::{AirConst, AirFloatSize, AirIntSize, AirType, Operand}; +use inkwell::AddressSpace; +use inkwell::context::Context; +use inkwell::types::{BasicTypeEnum, IntType}; +use inkwell::values::BasicValueEnum; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_operand( + &mut self, + operand: &Operand, + ) -> Result, CodegenError> { + match operand { + Operand::Copy(id) | Operand::Move(id) => self.load_local(*id), + Operand::Const(constant) => self.generate_const(constant), + } + } + + fn generate_const( + &mut self, + constant: &AirConst, + ) -> Result, CodegenError> { + match constant { + AirConst::IntLiteral(v) => { + Ok(self.context.i64_type().const_int(*v as u64, true).into()) + } + AirConst::Int(v, size) => { + let int_ty = int_type_for_size(self.context, *size); + Ok(int_ty + .const_int(*v as u64, is_signed_int_size(*size)) + .into()) + } + AirConst::Float(v, AirFloatSize::F32) => { + Ok(self.context.f32_type().const_float(*v).into()) + } + AirConst::Float(v, AirFloatSize::F64) => { + Ok(self.context.f64_type().const_float(*v).into()) + } + AirConst::Bool(v) => Ok(self + .context + .bool_type() + .const_int(u64::from(*v), false) + .into()), + AirConst::Str(s) => self.global_string_value(s), + AirConst::Null => Ok(self + .context + .ptr_type(AddressSpace::default()) + .const_null() + .into()), + AirConst::FnRef(name) => { + // FnRef keeps the source-level function name, so resolve it through + // the same symbol mapping as direct calls before touching LLVM. + let symbol_name = self + .program + .functions + .iter() + .find(|function| function.name == *name) + .map(function_symbol_name) + .unwrap_or_else(|| name.clone()); + let func = self.module.get_function(&symbol_name).ok_or_else(|| { + CodegenError::LlvmError(format!("fnref: unknown function '{}'", name)) + })?; + Ok(func.as_global_value().as_pointer_value().into()) + } + AirConst::Enum { .. } | AirConst::Array(_) | AirConst::Struct { .. } => { + Err(CodegenError::UnsupportedInstruction( + "enum/array/struct constants are only supported in global initializers" + .to_string(), + )) + } + AirConst::ZeroInit(ty) => Ok(air_basic_type_to_llvm(ty, self.context)?.const_zero()), + AirConst::Undef(ty) => { + let llvm_ty = air_basic_type_to_llvm(ty, self.context)?; + Ok(match llvm_ty { + BasicTypeEnum::IntType(t) => t.get_undef().into(), + BasicTypeEnum::FloatType(t) => t.get_undef().into(), + BasicTypeEnum::PointerType(t) => t.get_undef().into(), + BasicTypeEnum::StructType(t) => t.get_undef().into(), + BasicTypeEnum::ArrayType(t) => t.get_undef().into(), + BasicTypeEnum::VectorType(t) => t.get_undef().into(), + BasicTypeEnum::ScalableVectorType(t) => t.get_undef().into(), + }) + } + } + } + + pub(crate) fn operand_type(&self, operand: &Operand) -> Result { + match operand { + Operand::Copy(id) | Operand::Move(id) => Ok(self.local_air_type(*id)?.clone()), + Operand::Const(AirConst::IntLiteral(_)) => Ok(AirType::I64), + Operand::Const(AirConst::Int(_, size)) => Ok(int_type_from_size(*size)), + Operand::Const(AirConst::Float(_, AirFloatSize::F32)) => Ok(AirType::F32), + Operand::Const(AirConst::Float(_, AirFloatSize::F64)) => Ok(AirType::F64), + Operand::Const(AirConst::Bool(_)) => Ok(AirType::Bool), + Operand::Const(AirConst::Str(_)) => Ok(AirType::Str), + Operand::Const(AirConst::Null) => Ok(AirType::Ptr(Box::new(AirType::Void))), + Operand::Const(AirConst::FnRef(_)) => Ok(AirType::Ptr(Box::new(AirType::Void))), + Operand::Const(AirConst::Enum { enum_name, .. }) => Ok(AirType::Enum(enum_name.clone())), + Operand::Const(AirConst::ZeroInit(ty)) | Operand::Const(AirConst::Undef(ty)) => { + Ok(ty.clone()) + } + Operand::Const(AirConst::Array(_)) => Ok(AirType::Opaque), + Operand::Const(AirConst::Struct { name, .. }) => Ok(AirType::Struct(name.clone())), + } + } +} + +pub(crate) fn int_type_for_size(context: &'static Context, size: AirIntSize) -> IntType<'static> { + match size { + AirIntSize::I8 | AirIntSize::U8 => context.i8_type(), + AirIntSize::I16 | AirIntSize::U16 => context.i16_type(), + AirIntSize::I32 | AirIntSize::U32 => context.i32_type(), + AirIntSize::I64 | AirIntSize::U64 => context.i64_type(), + } +} + +pub(crate) fn int_type_from_size(size: AirIntSize) -> AirType { + match size { + AirIntSize::I8 => AirType::I8, + AirIntSize::I16 => AirType::I16, + AirIntSize::I32 => AirType::I32, + AirIntSize::I64 => AirType::I64, + AirIntSize::U8 => AirType::U8, + AirIntSize::U16 => AirType::U16, + AirIntSize::U32 => AirType::U32, + AirIntSize::U64 => AirType::U64, + } +} + +pub(crate) fn is_signed_int_size(size: AirIntSize) -> bool { + matches!( + size, + AirIntSize::I8 | AirIntSize::I16 | AirIntSize::I32 | AirIntSize::I64 + ) +} + +pub(crate) fn constant_kind_name(c: &AirConst) -> &'static str { + match c { + AirConst::IntLiteral(_) => "IntLiteral", + AirConst::Int(_, _) => "Int", + AirConst::Float(_, _) => "Float", + AirConst::Bool(_) => "Bool", + AirConst::Str(_) => "Str", + AirConst::Null => "Null", + AirConst::FnRef(_) => "FnRef", + AirConst::Enum { .. } => "Enum", + AirConst::ZeroInit(_) => "ZeroInit", + AirConst::Undef(_) => "Undef", + AirConst::Array(_) => "Array", + AirConst::Struct { .. } => "Struct", + } +} diff --git a/codegen/src/lowering/ops.rs b/codegen/src/lowering/ops.rs new file mode 100644 index 0000000..c29b2cc --- /dev/null +++ b/codegen/src/lowering/ops.rs @@ -0,0 +1,370 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use aelys_air::{AirType, BinOp, Operand, UnOp}; +use inkwell::values::{BasicValueEnum, FloatValue, IntValue}; +use inkwell::{FloatPredicate, IntPredicate}; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_binary_op( + &mut self, + op: &BinOp, + left: &Operand, + right: &Operand, + ) -> Result, CodegenError> { + let left_val = self.generate_operand(left)?; + let right_val = self.generate_operand(right)?; + + if left_val.is_int_value() && right_val.is_int_value() { + let left_ty = self.operand_type(left)?; + return self.generate_int_binary_op( + op.clone(), + left_val.into_int_value(), + right_val.into_int_value(), + &left_ty, + ); + } + + if left_val.is_float_value() && right_val.is_float_value() { + return self.generate_float_binary_op( + op.clone(), + left_val.into_float_value(), + right_val.into_float_value(), + ); + } + + let left_ty = self.operand_type(left)?; + if matches!(left_ty, AirType::Str) { + let right_ty = self.operand_type(right)?; + if matches!(right_ty, AirType::Str) { + return self.generate_string_binary_op( + op.clone(), + left_val.into_struct_value(), + right_val.into_struct_value(), + ); + } + } + + let left_ty = self.operand_type(left)?; + if matches!(left_ty, AirType::Enum(_)) { + return Err(CodegenError::UnsupportedInstruction(format!( + "comparison on data enum type `{:?}` is not supported; use `match` instead", + left_ty + ))); + } + Err(CodegenError::UnsupportedInstruction( + "binary op with non int/float operands".to_string(), + )) + } + + fn generate_int_binary_op( + &mut self, + op: BinOp, + left: IntValue<'static>, + right: IntValue<'static>, + operand_ty: &AirType, + ) -> Result, CodegenError> { + let is_unsigned = matches!( + operand_ty, + AirType::U8 | AirType::U16 | AirType::U32 | AirType::U64 + ); + + let value = match op { + BinOp::Add => self + .builder + .build_int_add(left, right, "iadd") + .map(Into::into), + BinOp::Sub => self + .builder + .build_int_sub(left, right, "isub") + .map(Into::into), + BinOp::Mul => self + .builder + .build_int_mul(left, right, "imul") + .map(Into::into), + BinOp::Div => { + self.emit_div_zero_check(right)?; + if is_unsigned { + self.builder + .build_int_unsigned_div(left, right, "iudiv") + .map(Into::into) + } else { + self.builder + .build_int_signed_div(left, right, "isdiv") + .map(Into::into) + } + } + BinOp::Rem => { + self.emit_div_zero_check(right)?; + if is_unsigned { + self.builder + .build_int_unsigned_rem(left, right, "iurem") + .map(Into::into) + } else { + self.builder + .build_int_signed_rem(left, right, "isrem") + .map(Into::into) + } + } + BinOp::Eq => self + .builder + .build_int_compare(IntPredicate::EQ, left, right, "icmp_eq") + .map(Into::into), + BinOp::Ne => self + .builder + .build_int_compare(IntPredicate::NE, left, right, "icmp_ne") + .map(Into::into), + BinOp::Lt => self + .builder + .build_int_compare( + if is_unsigned { + IntPredicate::ULT + } else { + IntPredicate::SLT + }, + left, + right, + "icmp_lt", + ) + .map(Into::into), + BinOp::Le => self + .builder + .build_int_compare( + if is_unsigned { + IntPredicate::ULE + } else { + IntPredicate::SLE + }, + left, + right, + "icmp_le", + ) + .map(Into::into), + BinOp::Gt => self + .builder + .build_int_compare( + if is_unsigned { + IntPredicate::UGT + } else { + IntPredicate::SGT + }, + left, + right, + "icmp_gt", + ) + .map(Into::into), + BinOp::Ge => self + .builder + .build_int_compare( + if is_unsigned { + IntPredicate::UGE + } else { + IntPredicate::SGE + }, + left, + right, + "icmp_ge", + ) + .map(Into::into), + BinOp::And | BinOp::BitAnd => { + self.builder.build_and(left, right, "iand").map(Into::into) + } + BinOp::Or | BinOp::BitOr => self.builder.build_or(left, right, "ior").map(Into::into), + BinOp::BitXor => self.builder.build_xor(left, right, "ixor").map(Into::into), + BinOp::Shl => { + // Mask shift amount to prevent LLVM UB (like Rust: amount % bitwidth) + let bitwidth = left.get_type().get_bit_width(); + let mask = left.get_type().const_int((bitwidth - 1) as u64, false); + let masked = self.builder.build_and(right, mask, "shl_mask") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.builder + .build_left_shift(left, masked, "ishl") + .map(Into::into) + } + BinOp::Shr => { + let bitwidth = left.get_type().get_bit_width(); + let mask = left.get_type().const_int((bitwidth - 1) as u64, false); + let masked = self.builder.build_and(right, mask, "shr_mask") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.builder + .build_right_shift(left, masked, !is_unsigned, "ishr") + .map(Into::into) + } + BinOp::CheckedAdd => { + return Err(self.unsupported_air( + "BinOp::CheckedAdd", + "checked integer add is not implemented for LLVM backend", + )); + } + BinOp::CheckedSub => { + return Err(self.unsupported_air( + "BinOp::CheckedSub", + "checked integer sub is not implemented for LLVM backend", + )); + } + BinOp::CheckedMul => { + return Err(self.unsupported_air( + "BinOp::CheckedMul", + "checked integer mul is not implemented for LLVM backend", + )); + } + }; + + value.map_err(|e| CodegenError::LlvmError(e.to_string())) + } + + fn generate_float_binary_op( + &mut self, + op: BinOp, + left: FloatValue<'static>, + right: FloatValue<'static>, + ) -> Result, CodegenError> { + let value = match op { + BinOp::Add => self + .builder + .build_float_add(left, right, "fadd") + .map(Into::into), + BinOp::Sub => self + .builder + .build_float_sub(left, right, "fsub") + .map(Into::into), + BinOp::Mul => self + .builder + .build_float_mul(left, right, "fmul") + .map(Into::into), + BinOp::Div => self + .builder + .build_float_div(left, right, "fdiv") + .map(Into::into), + BinOp::Rem => self + .builder + .build_float_rem(left, right, "frem") + .map(Into::into), + BinOp::Eq => self + .builder + .build_float_compare(FloatPredicate::OEQ, left, right, "fcmp_eq") + .map(Into::into), + BinOp::Ne => self + .builder + .build_float_compare(FloatPredicate::ONE, left, right, "fcmp_ne") + .map(Into::into), + BinOp::Lt => self + .builder + .build_float_compare(FloatPredicate::OLT, left, right, "fcmp_lt") + .map(Into::into), + BinOp::Le => self + .builder + .build_float_compare(FloatPredicate::OLE, left, right, "fcmp_le") + .map(Into::into), + BinOp::Gt => self + .builder + .build_float_compare(FloatPredicate::OGT, left, right, "fcmp_gt") + .map(Into::into), + BinOp::Ge => self + .builder + .build_float_compare(FloatPredicate::OGE, left, right, "fcmp_ge") + .map(Into::into), + _ => { + return Err(CodegenError::UnsupportedInstruction( + "unsupported float binop".to_string(), + )); + } + }; + + value.map_err(|e| CodegenError::LlvmError(e.to_string())) + } + + pub(crate) fn generate_unary_op( + &mut self, + op: &UnOp, + operand: &Operand, + ) -> Result, CodegenError> { + let value = self.generate_operand(operand)?; + + match op { + UnOp::Neg => { + if value.is_int_value() { + return self + .builder + .build_int_neg(value.into_int_value(), "ineg") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + + if value.is_float_value() { + return self + .builder + .build_float_neg(value.into_float_value(), "fneg") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + } + UnOp::Not => { + if matches!(self.operand_type(operand)?, AirType::Bool) { + return self + .builder + .build_not(value.into_int_value(), "not") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + } + UnOp::BitNot => { + if value.is_int_value() { + return self + .builder + .build_not(value.into_int_value(), "bitnot") + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())); + } + } + } + + Err(CodegenError::UnsupportedInstruction( + "unsupported unary op".to_string(), + )) + } + + fn generate_string_binary_op( + &mut self, + op: BinOp, + left: inkwell::values::StructValue<'static>, + right: inkwell::values::StructValue<'static>, + ) -> Result, CodegenError> { + match op { + BinOp::Eq | BinOp::Ne => { + // exctract ptr/len from both string structs (flat ABI) + let (a_ptr, a_len) = self.string_parts_from_value(left)?; + let (b_ptr, b_len) = self.string_parts_from_value(right)?; + + let str_eq_fn = self.ensure_str_eq_function(); + let result = self + .builder + .build_call( + str_eq_fn, + &[a_ptr.into(), a_len.into(), b_ptr.into(), b_len.into()], + "str_eq", + ) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let i64_val = result + .try_as_basic_value() + .basic() + .ok_or_else(|| CodegenError::LlvmError("str_eq returned void".to_string()))? + .into_int_value(); + + let is_eq = matches!(op, BinOp::Eq); + let pred = if is_eq { + IntPredicate::NE + } else { + IntPredicate::EQ + }; + let name = if is_eq { "str_eq" } else { "str_ne" }; + self.builder + .build_int_compare(pred, i64_val, self.context.i64_type().const_zero(), name) + .map(Into::into) + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + _ => Err(CodegenError::UnsupportedInstruction( + "unsupported string binary op (only == and != are supported)".to_string(), + )), + } + } +} diff --git a/codegen/src/lowering/runtime.rs b/codegen/src/lowering/runtime.rs new file mode 100644 index 0000000..4c600f3 --- /dev/null +++ b/codegen/src/lowering/runtime.rs @@ -0,0 +1,217 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::types::aelys_string_type; +use inkwell::AddressSpace; +use inkwell::IntPredicate; +use inkwell::values::{FunctionValue, IntValue}; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn ensure_alloc_function(&self) -> FunctionValue<'static> { + if let Some(function) = self.module.get_function("__aelys_alloc") { + return function; + } + + let fn_ty = self + .context + .ptr_type(AddressSpace::default()) + .fn_type(&[self.context.i64_type().into()], false); + self.module.add_function("__aelys_alloc", fn_ty, None) + } + + pub(crate) fn ensure_free_function(&self) -> FunctionValue<'static> { + if let Some(function) = self.module.get_function("__aelys_free") { + return function; + } + + let fn_ty = self.context.void_type().fn_type( + &[self.context.ptr_type(AddressSpace::default()).into()], + false, + ); + self.module.add_function("__aelys_free", fn_ty, None) + } + + pub(crate) fn ensure_write_function(&self) -> FunctionValue<'static> { + if let Some(function) = self.module.get_function("__aelys_write") { + return function; + } + + let fn_ty = self.context.void_type().fn_type( + &[ + self.context.ptr_type(AddressSpace::default()).into(), + self.context.i64_type().into(), + ], + false, + ); + self.module.add_function("__aelys_write", fn_ty, None) + } + + pub(crate) fn ensure_panic_function(&self) -> FunctionValue<'static> { + if let Some(function) = self.module.get_function("__aelys_panic") { + return function; + } + + let fn_ty = self.context.void_type().fn_type( + &[ + self.context.ptr_type(AddressSpace::default()).into(), + self.context.i64_type().into(), + ], + false, + ); + // Trivial optimization, LLVM declaration doesn't add `noreturn` attribute, so it basically + // can't optimize based on the fact that panic never returns + // yeah I be fixing up the most useless stuff possible + let function = self.module.add_function("__aelys_panic", fn_ty, None); + let noreturn_id = inkwell::attributes::Attribute::get_named_enum_kind_id("noreturn"); + let noreturn_attr = self.context.create_enum_attribute(noreturn_id, 0); + function.add_attribute(inkwell::attributes::AttributeLoc::Function, noreturn_attr); + function + } + + // sret-returning runtime functions (return %__aelys_string) + + // on windows x64 MSVC, struct returns use sret (first param = ptr to result slot) + // because LLVM and MSVC disagree on 16-byte struct lowering (ce7dd07) + + /// Helper: declare a runtime function that returns %__aelys_string. + /// Handles the windows sret ABI automatically. + fn declare_string_returning_fn( + &self, + name: &str, + params: &[inkwell::types::BasicMetadataTypeEnum<'static>], + ) -> FunctionValue<'static> { + if let Some(f) = self.module.get_function(name) { + return f; + } + let string_ty = aelys_string_type(self.context); + let use_sret = self.target_is_windows(); + let fn_ty = if use_sret { + let mut all_params = vec![self.context.ptr_type(AddressSpace::default()).into()]; + all_params.extend_from_slice(params); + self.context.void_type().fn_type(&all_params, false) + } else { + string_ty.fn_type(params, false) + }; + let function = self.module.add_function(name, fn_ty, None); + if use_sret { + use inkwell::attributes::AttributeLoc; + let sret_attr = self.context.create_type_attribute( + inkwell::attributes::Attribute::get_named_enum_kind_id("sret"), + string_ty.into(), + ); + function.add_attribute(AttributeLoc::Param(0), sret_attr); + } + function + } + + pub(crate) fn ensure_str_char_at_function(&self) -> FunctionValue<'static> { + let ptr_ty = self.context.ptr_type(AddressSpace::default()).into(); + let i64_ty = self.context.i64_type().into(); + self.declare_string_returning_fn("__aelys_str_char_at", &[ptr_ty, i64_ty, i64_ty]) + } + + pub(crate) fn ensure_to_string_i64_function(&self) -> FunctionValue<'static> { + self.declare_string_returning_fn("__aelys_to_string_i64", &[self.context.i64_type().into()]) + } + + pub(crate) fn ensure_to_string_f64_function(&self) -> FunctionValue<'static> { + self.declare_string_returning_fn("__aelys_to_string_f64", &[self.context.f64_type().into()]) + } + + pub(crate) fn ensure_to_string_bool_function(&self) -> FunctionValue<'static> { + // bool is passed as i64 (0 or 1) to the C runtime + self.declare_string_returning_fn( + "__aelys_to_string_bool", + &[self.context.i64_type().into()], + ) + } + + /// `__aelys_str_eq(ptr, i64, ptr, i64) -> i64` + /// flat ABI: (a_ptr, a_len, b_ptr, b_len) to avoid struct passing on windows x64 + pub(crate) fn ensure_str_eq_function(&self) -> FunctionValue<'static> { + if let Some(function) = self.module.get_function("__aelys_str_eq") { + return function; + } + + let ptr_ty = self.context.ptr_type(AddressSpace::default()).into(); + let i64_ty = self.context.i64_type().into(); + let fn_ty = self + .context + .i64_type() + .fn_type(&[ptr_ty, i64_ty, ptr_ty, i64_ty], false); + self.module.add_function("__aelys_str_eq", fn_ty, None) + } + + /// Emit a division-by-zero check: if `divisor == 0`, branch to a panic + /// block; otherwise continue in a new `div_ok` block. + pub(crate) fn emit_div_zero_check( + &mut self, + divisor: IntValue<'static>, + ) -> Result<(), CodegenError> { + let is_zero = self + .builder + .build_int_compare( + IntPredicate::EQ, + divisor, + divisor.get_type().const_zero(), + "div_zero_cmp", + ) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + let current_fn = self.function; + let trap_block = self.context.append_basic_block(current_fn, "div_zero"); + let ok_block = self.context.append_basic_block(current_fn, "div_ok"); + + self.builder + .build_conditional_branch(is_zero, trap_block, ok_block) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + self.builder.position_at_end(trap_block); + let panic_fn = self.ensure_panic_function(); + let (msg_ptr, msg_len) = self.global_string_ptr_len("division by zero")?; + let msg_len_val = self.context.i64_type().const_int(msg_len, false); + self.builder + .build_call(panic_fn, &[msg_ptr.into(), msg_len_val.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.builder + .build_unreachable() + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + self.builder.position_at_end(ok_block); + Ok(()) + } + + /// Emit a bounds check: if `index >= length` (unsigned), branch to a panic + /// block; otherwise continue in a new `idx_ok` block. + pub(crate) fn emit_bounds_check( + &mut self, + index: IntValue<'static>, + length: IntValue<'static>, + ) -> Result<(), CodegenError> { + let oob = self + .builder + .build_int_compare(IntPredicate::UGE, index, length, "idx_oob_cmp") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + let current_fn = self.function; + let oob_block = self.context.append_basic_block(current_fn, "idx_oob"); + let ok_block = self.context.append_basic_block(current_fn, "idx_ok"); + + self.builder + .build_conditional_branch(oob, oob_block, ok_block) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + self.builder.position_at_end(oob_block); + let panic_fn = self.ensure_panic_function(); + let (msg_ptr, msg_len) = self.global_string_ptr_len("index out of bounds")?; + let msg_len_val = self.context.i64_type().const_int(msg_len, false); + self.builder + .build_call(panic_fn, &[msg_ptr.into(), msg_len_val.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.builder + .build_unreachable() + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + self.builder.position_at_end(ok_block); + Ok(()) + } +} diff --git a/codegen/src/lowering/rvalues.rs b/codegen/src/lowering/rvalues.rs new file mode 100644 index 0000000..7db8e98 --- /dev/null +++ b/codegen/src/lowering/rvalues.rs @@ -0,0 +1,469 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::lowering::functions::function_symbol_name; +use crate::types::{air_basic_type_to_llvm, alignment_of, closure_fat_ptr_type}; +use aelys_air::layout::{enum_has_data, enum_max_payload_size}; +use aelys_air::{AirType, Operand, Rvalue}; +use inkwell::values::{BasicValue, BasicValueEnum}; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_rvalue( + &mut self, + rvalue: &Rvalue, + expected_ty: Option<&AirType>, + ) -> Result, CodegenError> { + match rvalue { + Rvalue::Use(operand) => self.generate_operand(operand), + Rvalue::BinaryOp(op, left, right) => self.generate_binary_op(op, left, right), + Rvalue::UnaryOp(op, operand) => self.generate_unary_op(op, operand), + Rvalue::Call { func, args } => { + self.generate_call(func, args, expected_ty)?.ok_or_else(|| { + CodegenError::LlvmError("call used as value returned void".to_string()) + }) + } + Rvalue::StructInit { name, fields } => self.generate_struct_init(name, fields), + Rvalue::FieldAccess { base, field } => self.generate_field_access(base, field), + Rvalue::AddressOf(local) => Ok(self.lookup_local_ptr(*local)?.as_basic_value_enum()), + Rvalue::Deref(operand) => { + let ptr = self.generate_operand(operand)?.into_pointer_value(); + let inner = match self.operand_type(operand)? { + AirType::Ptr(inner) => *inner, + other => { + return Err(CodegenError::UnsupportedType(format!( + "cannot dereference operand of type {:?}", + other + ))); + } + }; + let inner_ty = air_basic_type_to_llvm(&inner, self.context)?; + self.load_value(inner_ty, ptr, "deref") + } + Rvalue::Cast { operand, from, to } => self.generate_cast(operand, from, to), + Rvalue::Index { base, index } => self.generate_index(base, index), + Rvalue::EnumInit { + enum_name, + tag, + payload, + .. + } => self.generate_enum_init(enum_name, *tag, payload), + Rvalue::EnumTag { enum_name, operand } => self.generate_enum_tag(enum_name, operand), + Rvalue::EnumPayload { + enum_name, + tag, + operand, + field_index, + } => self.generate_enum_payload(enum_name, *tag, operand, *field_index), + Rvalue::ClosureCreate { fn_name, env } => { + self.generate_closure_create(fn_name, env) + } + } + } + + fn generate_index( + &mut self, + base: &Operand, + index: &Operand, + ) -> Result, CodegenError> { + let idx_val = self.generate_operand(index)?.into_int_value(); + let base_ty = self.operand_type(base)?; + + match base_ty { + AirType::Array(ref inner, n) => { + let length = self.context.i64_type().const_int(n, false); + self.emit_bounds_check(idx_val, length)?; + + let base_local = match base { + Operand::Copy(id) | Operand::Move(id) => *id, + _ => { + return Err(CodegenError::LlvmError( + "array index base must be a local".to_string(), + )); + } + }; + let arr_ty = air_basic_type_to_llvm(&base_ty, self.context)?; + let ptr = self.lookup_local_ptr(base_local)?; + let zero = self.context.i64_type().const_zero(); + let elem_ptr = unsafe { + self.builder + .build_in_bounds_gep(arr_ty, ptr, &[zero, idx_val], "idx_elem_ptr") + } + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let elem_ty = air_basic_type_to_llvm(inner, self.context)?; + self.load_value(elem_ty, elem_ptr, "idx_load") + } + AirType::Slice(ref inner) => { + let slice_val = self.generate_operand(base)?.into_struct_value(); + let data_ptr = self + .builder + .build_extract_value(slice_val, 0, "slice_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_pointer_value(); + let length = self + .builder + .build_extract_value(slice_val, 1, "slice_len") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_int_value(); + self.emit_bounds_check(idx_val, length)?; + let elem_ty = air_basic_type_to_llvm(inner, self.context)?; + let elem_ptr = unsafe { + self.builder + .build_in_bounds_gep(elem_ty, data_ptr, &[idx_val], "idx_elem_ptr") + } + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.load_value(elem_ty, elem_ptr, "idx_load") + } + AirType::Str => { + // UTF-8 char indexing, runtime handles multi-byte scanning + let str_val = self.generate_operand(base)?.into_struct_value(); + let (str_ptr, str_len) = self.string_parts_from_value(str_val)?; + let char_at_fn = self.ensure_str_char_at_function(); + self.call_sret_returning_fn( + char_at_fn, + &[str_ptr.into(), str_len.into(), idx_val.into()], + "str_char_at", + ) + } + other => Err(CodegenError::UnsupportedType(format!( + "cannot index into {:?}", + other + ))), + } + } + + fn generate_enum_init( + &mut self, + enum_name: &str, + tag: u32, + payload: &[Operand], + ) -> Result, CodegenError> { + // Look up the enum def to decide simple vs data + let enum_def = self.program.enums.iter().find(|e| e.name == enum_name); + + let is_data_enum = enum_def.is_some_and(|d| enum_has_data(d)); + + if !is_data_enum || payload.is_empty() { + // Simple enum or unit variant of a data enum: still need to produce + // the right type. For data enums, we must produce a { i32, [N x i8] } value. + if is_data_enum { + let def = enum_def.unwrap(); + let max_payload = enum_max_payload_size(def, &self.program.struct_sizes); + let enum_struct_name = format!("__aelys_enum_{}", enum_name); + let enum_ty = self + .context + .get_struct_type(&enum_struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "unknown enum struct type: {}", + enum_struct_name + )) + })?; + + let tmp = self + .builder + .build_alloca(enum_ty, "enum_tmp") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(tmp, enum_ty.into())?; + + // Store the tag + let tag_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 0, "enum_tag_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let tag_val = self.context.i32_type().const_int(tag as u64, false); + self.store_value(tag_ptr, tag_val.into())?; + + // Zero-init the payload area + if max_payload > 0 { + let payload_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 1, "enum_payload_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let payload_arr_ty = self.context.i8_type().array_type(max_payload); + let zero = payload_arr_ty.const_zero(); + self.store_value(payload_ptr, zero.into())?; + } + + self.load_value(enum_ty.into(), tmp, "enum_value") + } else { + // Pure simple enum: just an i32 tag + Ok(self.context.i32_type().const_int(tag as u64, false).into()) + } + } else { + // Data variant construction: build { i32 tag, [N x i8] payload } + let def = enum_def.unwrap(); + let max_payload = enum_max_payload_size(def, &self.program.struct_sizes); + let enum_struct_name = format!("__aelys_enum_{}", enum_name); + let enum_ty = self + .context + .get_struct_type(&enum_struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "unknown enum struct type: {}", + enum_struct_name + )) + })?; + + let tmp = self + .builder + .build_alloca(enum_ty, "enum_tmp") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(tmp, enum_ty.into())?; + + // Store the tag at index 0 + let tag_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 0, "enum_tag_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let tag_val = self.context.i32_type().const_int(tag as u64, false); + self.store_value(tag_ptr, tag_val.into())?; + + // Zero-init the full payload area first so trailing bytes are clean + if max_payload > 0 { + let payload_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 1, "enum_payload_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let payload_arr_ty = self.context.i8_type().array_type(max_payload); + let zero = payload_arr_ty.const_zero(); + self.store_value(payload_ptr, zero.into())?; + } + + // Store each payload field at the right offset within the byte array + let payload_base_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 1, "enum_payload_base") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + // Find the variant definition to get the field types + let variant_def = def.variants.iter().find(|v| v.tag == tag).ok_or_else(|| { + CodegenError::LlvmError(format!( + "unknown variant tag {} for enum {}", + tag, enum_name + )) + })?; + + let mut byte_offset: u32 = 0; + for (i, (operand, field_air_ty)) in + payload.iter().zip(variant_def.payload.iter()).enumerate() + { + let field_llvm_ty = air_basic_type_to_llvm(field_air_ty, self.context)?; + let field_layout = aelys_air::layout::resolved_layout(field_air_ty, &self.program.struct_sizes); + + // Align the offset + byte_offset = (byte_offset + field_layout.align - 1) & !(field_layout.align - 1); + + // GEP into the byte array at the current offset, then bitcast to field type pointer + let offset_val = self.context.i32_type().const_int(byte_offset as u64, false); + let field_ptr = unsafe { + self.builder.build_in_bounds_gep( + self.context.i8_type(), + payload_base_ptr, + &[offset_val], + &format!("enum_field_{}_ptr", i), + ) + } + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + let value = self.generate_operand(operand)?; + + // The payload byte array sits at struct offset 4 (after the i32 + // tag) inside the non-packed struct { i32, [N x i8] }. The struct + // alignment is 4, so the payload base is 4-byte aligned. We must + // not claim a higher alignment than the address actually has. + let field_align = alignment_of(field_llvm_ty).min(4); + let store = self + .builder + .build_store(field_ptr, value) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + store + .set_alignment(field_align) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + byte_offset += field_layout.size; + } + + self.load_value(enum_ty.into(), tmp, "enum_value") + } + } + + fn generate_enum_tag( + &mut self, + enum_name: &str, + operand: &Operand, + ) -> Result, CodegenError> { + let enum_def = self.program.enums.iter().find(|e| e.name == enum_name); + + let is_data_enum = enum_def.is_some_and(|d| enum_has_data(d)); + + if is_data_enum { + // Data enum: { i32 tag, [N x i8] payload } -- extract field 0 + let enum_struct_name = format!("__aelys_enum_{}", enum_name); + let enum_ty = self + .context + .get_struct_type(&enum_struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "unknown enum struct type: {}", + enum_struct_name + )) + })?; + + // The operand is a struct value. We need it on the stack to GEP into it. + let val = self.generate_operand(operand)?; + let tmp = self + .builder + .build_alloca(enum_ty, "match_enum_tmp") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(tmp, enum_ty.into())?; + self.store_value(tmp, val)?; + + let tag_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 0, "match_tag_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.load_value(self.context.i32_type().into(), tag_ptr, "match_tag") + } else { + // Simple enum: the value IS the i32 tag + self.generate_operand(operand) + } + } + + fn generate_enum_payload( + &mut self, + enum_name: &str, + tag: u32, + operand: &Operand, + field_index: u32, + ) -> Result, CodegenError> { + let def = self + .program + .enums + .iter() + .find(|e| e.name == enum_name) + .ok_or_else(|| CodegenError::UnsupportedType(format!("unknown enum: {}", enum_name)))?; + + let variant_def = def.variants.iter().find(|v| v.tag == tag).ok_or_else(|| { + CodegenError::LlvmError(format!( + "unknown variant tag {} for enum {}", + tag, enum_name + )) + })?; + + if field_index as usize >= variant_def.payload.len() { + return Err(CodegenError::LlvmError(format!( + "field index {} out of range for variant (has {} fields)", + field_index, + variant_def.payload.len() + ))); + } + + let field_air_ty = &variant_def.payload[field_index as usize]; + let field_llvm_ty = air_basic_type_to_llvm(field_air_ty, self.context)?; + + let enum_struct_name = format!("__aelys_enum_{}", enum_name); + let enum_ty = self + .context + .get_struct_type(&enum_struct_name) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "unknown enum struct type: {}", + enum_struct_name + )) + })?; + + // Store the operand on the stack so we can GEP into it + let val = self.generate_operand(operand)?; + let tmp = self + .builder + .build_alloca(enum_ty, "match_payload_tmp") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(tmp, enum_ty.into())?; + self.store_value(tmp, val)?; + + // GEP to the payload byte array (field 1 of the enum struct) + let payload_base_ptr = self + .builder + .build_struct_gep(enum_ty, tmp, 1, "match_payload_base") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + // Calculate the byte offset of the requested field, matching the layout used in EnumInit + let mut byte_offset: u32 = 0; + for i in 0..=field_index { + let ty = &variant_def.payload[i as usize]; + let layout = aelys_air::layout::resolved_layout(ty, &self.program.struct_sizes); + // Align before this field + byte_offset = (byte_offset + layout.align - 1) & !(layout.align - 1); + if i < field_index { + byte_offset += layout.size; + } + } + + // GEP into the byte array at the computed offset + let offset_val = self.context.i32_type().const_int(byte_offset as u64, false); + let field_ptr = unsafe { + self.builder.build_in_bounds_gep( + self.context.i8_type(), + payload_base_ptr, + &[offset_val], + &format!("match_field_{}_ptr", field_index), + ) + } + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + // The payload byte array sits at struct offset 4 — see comment in + // generate_enum_init for why alignment is capped at 4. + let field_align = alignment_of(field_llvm_ty).min(4); + let load = self + .builder + .build_load( + field_llvm_ty, + field_ptr, + &format!("match_field_{}", field_index), + ) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + load.as_instruction_value() + .unwrap() + .set_alignment(field_align) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(load) + } + + // Builds a fat pointer { fn_ptr, env_ptr } for a closure or a named + // function used as a value. env is either a heap-allocated env struct + // pointer (capturing closure) or null (non-capturing lambda, named fn). + fn generate_closure_create( + &mut self, + fn_name: &str, + env: &Operand, + ) -> Result, CodegenError> { + let symbol_name = self + .program + .functions + .iter() + .find(|f| f.name == *fn_name) + .map(function_symbol_name) + .unwrap_or_else(|| fn_name.to_string()); + let func = self.module.get_function(&symbol_name).ok_or_else(|| { + CodegenError::LlvmError(format!("closure_create: unknown function '{}'", fn_name)) + })?; + let fn_ptr = func.as_global_value().as_pointer_value(); + + // generate the env operand (pointer or null) + let env_ptr = self.generate_operand(env)?; + + // Build { ptr fn_ptr, ptr env_ptr } struct + let fat_ty = closure_fat_ptr_type(self.context); + let mut fat = fat_ty.get_undef(); + fat = self + .builder + .build_insert_value(fat, fn_ptr, 0, "closure_fn") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_struct_value(); + fat = self + .builder + .build_insert_value(fat, env_ptr, 1, "closure_env") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_struct_value(); + Ok(fat.into()) + } +} diff --git a/codegen/src/lowering/stmts.rs b/codegen/src/lowering/stmts.rs new file mode 100644 index 0000000..07c072f --- /dev/null +++ b/codegen/src/lowering/stmts.rs @@ -0,0 +1,217 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::types::air_basic_type_to_llvm; +use aelys_air::{AirStmtKind, AirType, Place}; +use inkwell::AddressSpace; +use inkwell::types::BasicTypeEnum; +use inkwell::values::PointerValue; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_stmt(&mut self, stmt: &AirStmtKind) -> Result<(), CodegenError> { + match stmt { + AirStmtKind::Assign { place, rvalue } => { + let expected_ty = self.place_type(place)?; + let value = self.generate_rvalue(rvalue, Some(&expected_ty))?; + match place { + Place::Local(local) => self.assign_local(*local, value), + _ => { + let ptr = self.place_ptr(place)?; + self.store_value(ptr, value) + } + } + } + AirStmtKind::CallVoid { func, args } => { + let _ = self.generate_call(func, args, None)?; + Ok(()) + } + AirStmtKind::GcAlloc { local, ty, .. } | AirStmtKind::Alloc { local, ty } => { + let alloc_fn = self.ensure_alloc_function(); + let size = self.air_type_size(ty)? as u64; + let size_value = self.context.i64_type().const_int(size, false); + let call = self + .builder + .build_call(alloc_fn, &[size_value.into()], "alloc_raw") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let raw_ptr = call + .try_as_basic_value() + .basic() + .ok_or_else(|| { + CodegenError::LlvmError("__aelys_alloc returned void".to_string()) + })? + .into_pointer_value(); + + let local_ty = self.local_air_type(*local)?.clone(); + let target_ty = air_basic_type_to_llvm(&local_ty, self.context)?; + let target_ptr_ty = match target_ty { + BasicTypeEnum::PointerType(ptr) => ptr, + _ => { + return Err(CodegenError::UnsupportedType(format!( + "alloc destination local {} is not a pointer type", + local.0 + ))); + } + }; + + let casted = self + .builder + .build_pointer_cast(raw_ptr, target_ptr_ty, "alloc_cast") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.assign_local(*local, casted.into()) + } + AirStmtKind::Free(local) => { + let free_fn = self.ensure_free_function(); + let ptr_value = self.load_local(*local)?; + let ptr = ptr_value.into_pointer_value(); + let i8_ptr_ty = self.context.ptr_type(AddressSpace::default()); + let casted = self + .builder + .build_pointer_cast(ptr, i8_ptr_ty, "free_cast") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.builder + .build_call(free_fn, &[casted.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + AirStmtKind::GcDrop(_) => Err(self.unsupported_air( + "AirStmtKind::GcDrop", + "gc_drop is not implemented for LLVM backend", + )), + AirStmtKind::ArenaCreate(_) => Err(self.unsupported_air( + "AirStmtKind::ArenaCreate", + "arena_create is not implemented for LLVM backend", + )), + AirStmtKind::ArenaDestroy(_) => Err(self.unsupported_air( + "AirStmtKind::ArenaDestroy", + "arena_destroy is not implemented for LLVM backend", + )), + AirStmtKind::MemoryFence(ordering) => Err(self.unsupported_air( + "AirStmtKind::MemoryFence", + format!("memory fence ordering {ordering:?} is not implemented"), + )), + } + } + + fn place_ptr(&mut self, place: &Place) -> Result, CodegenError> { + match place { + Place::Local(local) => self.lookup_local_ptr(*local), + Place::Field(local, field) => match self.local_air_type(*local)?.clone() { + AirType::Struct(name) => { + let struct_ty = self.context.get_struct_type(&name).ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown struct {}", name)) + })?; + let ptr = self.lookup_local_ptr(*local)?; + let index = self.struct_field_index(&name, field)?; + self.builder + .build_struct_gep(struct_ty, ptr, index, "place_field") + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + AirType::Ptr(inner) => match inner.as_ref() { + AirType::Struct(name) => { + let struct_ty = self.context.get_struct_type(name).ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown struct {}", name)) + })?; + let base_ptr = self.load_local(*local)?.into_pointer_value(); + let index = self.struct_field_index(name, field)?; + self.builder + .build_struct_gep(struct_ty, base_ptr, index, "place_field") + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + _ => Err(CodegenError::UnsupportedType(format!( + "place field on non-struct pointer local {}", + local.0 + ))), + }, + _ => Err(CodegenError::UnsupportedType(format!( + "place field on non-struct local {}", + local.0 + ))), + }, + Place::Deref(local) => Ok(self.load_local(*local)?.into_pointer_value()), + Place::Index(local, index_op) => { + let idx_val = self.generate_operand(index_op)?.into_int_value(); + match self.local_air_type(*local)?.clone() { + AirType::Array(ref inner, n) => { + let length = self.context.i64_type().const_int(n, false); + self.emit_bounds_check(idx_val, length)?; + let arr_ty = air_basic_type_to_llvm( + &AirType::Array(inner.clone(), n), + self.context, + )?; + let ptr = self.lookup_local_ptr(*local)?; + let zero = self.context.i64_type().const_zero(); + unsafe { + self.builder + .build_in_bounds_gep(arr_ty, ptr, &[zero, idx_val], "idx_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + } + AirType::Slice(ref inner) => { + let slice_val = self.load_local(*local)?.into_struct_value(); + let data_ptr = self + .builder + .build_extract_value(slice_val, 0, "slice_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_pointer_value(); + let length = self + .builder + .build_extract_value(slice_val, 1, "slice_len") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_int_value(); + self.emit_bounds_check(idx_val, length)?; + let elem_ty = air_basic_type_to_llvm(inner, self.context)?; + unsafe { + self.builder + .build_in_bounds_gep(elem_ty, data_ptr, &[idx_val], "idx_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } + } + other => Err(CodegenError::UnsupportedType(format!( + "cannot index into {:?}", + other + ))), + } + } + } + } + + fn place_type(&self, place: &Place) -> Result { + match place { + Place::Local(local) => Ok(self.local_air_type(*local)?.clone()), + Place::Field(local, field) => { + let struct_name = match self.local_air_type(*local)? { + AirType::Struct(name) => name.as_str(), + AirType::Ptr(inner) => match inner.as_ref() { + AirType::Struct(name) => name.as_str(), + _ => { + return Err(CodegenError::UnsupportedType(format!( + "field access on non-struct pointer local {}", + local.0 + ))); + } + }, + _ => { + return Err(CodegenError::UnsupportedType(format!( + "field access on non-struct local {}", + local.0 + ))); + } + }; + Ok(self.struct_field_type(struct_name, field)?.clone()) + } + Place::Deref(local) => match self.local_air_type(*local)? { + AirType::Ptr(inner) => Ok((**inner).clone()), + other => Err(CodegenError::UnsupportedType(format!( + "cannot dereference non-pointer place {:?}", + other + ))), + }, + Place::Index(local, _) => match self.local_air_type(*local)? { + AirType::Array(inner, _) | AirType::Slice(inner) => Ok((**inner).clone()), + other => Err(CodegenError::UnsupportedType(format!( + "cannot index into {:?}", + other + ))), + }, + } + } +} diff --git a/codegen/src/lowering/strings.rs b/codegen/src/lowering/strings.rs new file mode 100644 index 0000000..65e80b1 --- /dev/null +++ b/codegen/src/lowering/strings.rs @@ -0,0 +1,166 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::types::aelys_string_type; +use inkwell::attributes::{Attribute, AttributeLoc}; +use inkwell::module::Linkage; +use inkwell::types::{AnyType, BasicTypeEnum}; +use inkwell::values::{ + BasicMetadataValueEnum, BasicValueEnum, CallSiteValue, FunctionValue, IntValue, PointerValue, + StructValue, +}; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn add_sret_callsite_attr( + &self, + call: CallSiteValue<'static>, + ret_ty: BasicTypeEnum<'static>, + ) { + // Indirect calls do not inherit parameter attributes from a declaration, + // so stamp sret on the callsite itself whenever we materialize the hidden slot. + let sret_attr = self.context.create_type_attribute( + Attribute::get_named_enum_kind_id("sret"), + ret_ty.as_any_type_enum(), + ); + call.add_attribute(AttributeLoc::Param(0), sret_attr); + } + + pub(crate) fn global_string_ptr_len( + &mut self, + text: &str, + ) -> Result<(PointerValue<'static>, u64), CodegenError> { + let i8_ty = self.context.i8_type(); + let text_len = u64::try_from(text.len()).map_err(|_| { + CodegenError::UnsupportedInstruction("string literal too large".to_string()) + })?; + let array_len = u32::try_from(text.len()) + .ok() + .and_then(|n| n.checked_add(1)) + .ok_or_else(|| { + CodegenError::UnsupportedInstruction("string literal too large".to_string()) + })?; + + let global_ptr = if let Some(existing) = self.string_globals.get(text).copied() { + existing + } else { + let name = format!("str_{}_{}", self.air_function.id.0, self.string_id); + self.string_id = self.string_id.saturating_add(1); + + let mut bytes = Vec::with_capacity(text.len() + 1); + for byte in text.as_bytes() { + bytes.push(i8_ty.const_int(u64::from(*byte), false)); + } + bytes.push(i8_ty.const_zero()); + + let global = self + .module + .add_global(i8_ty.array_type(array_len), None, &name); + global.set_linkage(Linkage::Private); + global.set_constant(true); + global.set_initializer(&i8_ty.const_array(&bytes)); + let ptr = global.as_pointer_value(); + self.string_globals.insert(text.to_string(), ptr); + ptr + }; + + let array_ty = i8_ty.array_type(array_len); + + let zero = self.context.i64_type().const_zero(); + let ptr = unsafe { + self.builder + .build_in_bounds_gep(array_ty, global_ptr, &[zero, zero], "str_ptr") + } + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + + Ok((ptr, text_len)) + } + + pub(crate) fn global_string_value( + &mut self, + text: &str, + ) -> Result, CodegenError> { + let (ptr, len) = self.global_string_ptr_len(text)?; + let string_ty = aelys_string_type(self.context); + let value = self + .builder + .build_insert_value(string_ty.get_undef(), ptr, 0, "str_init_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_struct_value(); + let value = self + .builder + .build_insert_value( + value, + self.context.i64_type().const_int(len, false), + 1, + "str_init_len", + ) + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_struct_value(); + Ok(value.into()) + } + + pub(crate) fn string_parts_from_value( + &self, + value: StructValue<'static>, + ) -> Result<(PointerValue<'static>, IntValue<'static>), CodegenError> { + let ptr = self + .builder + .build_extract_value(value, 0, "str_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_pointer_value(); + let len = self + .builder + .build_extract_value(value, 1, "str_len") + .map_err(|e| CodegenError::LlvmError(e.to_string()))? + .into_int_value(); + Ok((ptr, len)) + } + + /// Call a function that uses sret convention (struct return via pointer). + /// On Windows, alloca a result slot, pass as first arg, call, load result. + /// On other targets, call normally and extract the return value. + pub(crate) fn call_with_sret( + &mut self, + fn_val: FunctionValue<'static>, + args: &[BasicMetadataValueEnum<'static>], + ret_ty: BasicTypeEnum<'static>, + name: &str, + ) -> Result, CodegenError> { + if self.target_is_windows() { + let result_ptr = self + .builder + .build_alloca(ret_ty, "sret_slot") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(result_ptr, ret_ty)?; + let mut all_args: Vec> = vec![result_ptr.into()]; + all_args.extend_from_slice(args); + let call = self + .builder + .build_call(fn_val, &all_args, "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + call.set_call_convention(fn_val.get_call_conventions()); + self.add_sret_callsite_attr(call, ret_ty); + self.builder + .build_load(ret_ty, result_ptr, name) + .map_err(|e| CodegenError::LlvmError(e.to_string())) + } else { + let call = self + .builder + .build_call(fn_val, args, name) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + call.set_call_convention(fn_val.get_call_conventions()); + call.try_as_basic_value() + .basic() + .ok_or_else(|| CodegenError::LlvmError(format!("{} returned void", name))) + } + } + + /// Call a runtime function that returns %__aelys_string via sret. + pub(crate) fn call_sret_returning_fn( + &mut self, + fn_val: FunctionValue<'static>, + args: &[BasicMetadataValueEnum<'static>], + name: &str, + ) -> Result, CodegenError> { + self.call_with_sret(fn_val, args, aelys_string_type(self.context).into(), name) + } +} diff --git a/codegen/src/lowering/structs.rs b/codegen/src/lowering/structs.rs new file mode 100644 index 0000000..5f767f3 --- /dev/null +++ b/codegen/src/lowering/structs.rs @@ -0,0 +1,216 @@ +use crate::CodegenContext; +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::types::air_basic_type_to_llvm; +use aelys_air::layout::{enum_has_data, enum_max_payload_size}; +use aelys_air::{AirProgram, AirType, Operand}; +use inkwell::types::StructType; +use inkwell::values::{BasicValueEnum, PointerValue}; + +impl CodegenContext { + pub(crate) fn declare_struct_types(&self, program: &AirProgram) -> Result<(), CodegenError> { + // Declare opaque struct types first (forward declarations) + for struct_def in &program.structs { + if self.context.get_struct_type(&struct_def.name).is_none() { + self.context.opaque_struct_type(&struct_def.name); + } + } + + // Declare named struct types for data enums BEFORE setting struct bodies, + // because struct fields may reference enum types and air_basic_type_to_llvm + // needs the enum LLVM type to be registered to return the correct type + // (otherwise it falls back to bare i32 instead of { i32, [N x i8] }). + for enum_def in &program.enums { + if enum_has_data(enum_def) { + let max_payload = enum_max_payload_size(enum_def, &program.struct_sizes); + let enum_struct_name = format!("__aelys_enum_{}", enum_def.name); + let enum_ty = self.context.opaque_struct_type(&enum_struct_name); + let tag_ty = self.context.i32_type().into(); + let payload_ty = self.context.i8_type().array_type(max_payload).into(); + enum_ty.set_body(&[tag_ty, payload_ty], false); + } + } + + // Now set struct bodies — enum LLVM types are available for field resolution + for struct_def in &program.structs { + let llvm_struct = self + .context + .get_struct_type(&struct_def.name) + .ok_or_else(|| { + CodegenError::LlvmError(format!( + "failed to retrieve declared struct: {}", + struct_def.name + )) + })?; + + if llvm_struct.is_opaque() { + let mut field_types = Vec::with_capacity(struct_def.fields.len()); + for field in &struct_def.fields { + field_types.push(air_basic_type_to_llvm(&field.ty, self.context)?); + } + llvm_struct.set_body(&field_types, false); + } + } + + Ok(()) + } +} + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_struct_init( + &mut self, + name: &str, + fields: &[(String, Operand)], + ) -> Result, CodegenError> { + let struct_ty = self + .context + .get_struct_type(name) + .ok_or_else(|| CodegenError::UnsupportedType(format!("unknown struct {}", name)))?; + let tmp = self + .builder + .build_alloca(struct_ty, "struct_tmp") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.align_alloca(tmp, struct_ty.into())?; + + for (field_name, operand) in fields { + let index = self.struct_field_index(name, field_name)?; + let ptr = self + .builder + .build_struct_gep(struct_ty, tmp, index, "field_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let value = self.generate_operand(operand)?; + self.store_value(ptr, value)?; + } + + self.load_value(struct_ty.into(), tmp, "struct_value") + } + + pub(crate) fn generate_field_access( + &mut self, + base: &Operand, + field: &str, + ) -> Result, CodegenError> { + if matches!(self.operand_type(base)?, AirType::Str) { + if field != "len" { + return Err(CodegenError::UnsupportedType(format!( + "unknown field `{}` on `Str`", + field + ))); + } + let str_value = self.generate_operand(base)?; + if !str_value.is_struct_value() { + return Err(CodegenError::UnsupportedType( + "expected Str fat pointer value for field access".to_string(), + )); + } + return Ok(self + .builder + .build_extract_value(str_value.into_struct_value(), 1, "str_len_extract") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?); + } + + if let Operand::Copy(local) | Operand::Move(local) = base { + if let AirType::Struct(name) = self.local_air_type(*local)?.clone() { + if !self.local_uses_alloca(*local) { + let index = self.struct_field_index(&name, field)?; + let struct_value = self.load_local(*local)?.into_struct_value(); + return Ok(self + .builder + .build_extract_value(struct_value, index, "field_extract") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?); + } + } + } + + let (struct_name, struct_ty, base_ptr) = self.struct_pointer_from_operand(base)?; + let index = self.struct_field_index(&struct_name, field)?; + let field_ty = self.struct_field_type(&struct_name, field)?; + let field_ptr = self + .builder + .build_struct_gep(struct_ty, base_ptr, index, "field_ptr") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + let llvm_field_ty = air_basic_type_to_llvm(field_ty, self.context)?; + self.load_value(llvm_field_ty, field_ptr, "field_load") + } + + pub(crate) fn struct_pointer_from_operand( + &mut self, + operand: &Operand, + ) -> Result<(String, StructType<'static>, PointerValue<'static>), CodegenError> { + match operand { + Operand::Copy(local) | Operand::Move(local) => match self.local_air_type(*local)? { + AirType::Struct(name) => { + let ty = self.context.get_struct_type(name).ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown struct {}", name)) + })?; + Ok((name.clone(), ty, self.lookup_local_ptr(*local)?)) + } + AirType::Ptr(inner) => match inner.as_ref() { + AirType::Struct(name) => { + let ty = self.context.get_struct_type(name).ok_or_else(|| { + CodegenError::UnsupportedType(format!("unknown struct {}", name)) + })?; + Ok(( + name.clone(), + ty, + self.load_local(*local)?.into_pointer_value(), + )) + } + other => Err(CodegenError::UnsupportedType(format!( + "field access pointer to non-struct {:?}", + other + ))), + }, + other => Err(CodegenError::UnsupportedType(format!( + "field access on non-struct type {:?}", + other + ))), + }, + Operand::Const(_) => Err(CodegenError::UnsupportedInstruction( + "field access on const base is not supported".to_string(), + )), + } + } + + pub(crate) fn struct_field_index( + &self, + struct_name: &str, + field: &str, + ) -> Result { + let index = self + .program + .structs + .iter() + .find(|s| s.name == struct_name) + .and_then(|s| s.fields.iter().position(|f| f.name == field)) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "unknown field `{}` on `{}`", + field, struct_name + )) + })?; + + u32::try_from(index).map_err(|_| { + CodegenError::UnsupportedType(format!("field index overflow on {}", struct_name)) + }) + } + + pub(crate) fn struct_field_type<'b>( + &'b self, + struct_name: &str, + field: &str, + ) -> Result<&'b AirType, CodegenError> { + self.program + .structs + .iter() + .find(|s| s.name == struct_name) + .and_then(|s| s.fields.iter().find(|f| f.name == field)) + .map(|f| &f.ty) + .ok_or_else(|| { + CodegenError::UnsupportedType(format!( + "unknown field `{}` on `{}`", + field, struct_name + )) + }) + } +} diff --git a/codegen/src/lowering/terminators.rs b/codegen/src/lowering/terminators.rs new file mode 100644 index 0000000..21d2723 --- /dev/null +++ b/codegen/src/lowering/terminators.rs @@ -0,0 +1,128 @@ +use crate::CodegenError; +use crate::lowering::body::FunctionCodegen; +use crate::lowering::operands::{constant_kind_name, is_signed_int_size}; +use aelys_air::{AirConst, AirTerminator, AirType}; +use inkwell::values::IntValue; + +impl<'a> FunctionCodegen<'a> { + pub(crate) fn generate_terminator(&mut self, term: &AirTerminator) -> Result<(), CodegenError> { + match term { + AirTerminator::Return(Some(operand)) => { + if matches!(self.air_function.ret_ty, AirType::Void) { + self.builder + .build_return(None) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + return Ok(()); + } + let value = self.generate_operand(operand)?; + if let Some(sret_ptr) = self.sret_ptr { + // C-convention sret: store into caller-provided slot, return void + self.store_value(sret_ptr, value)?; + self.builder + .build_return(None) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + } else { + self.builder + .build_return(Some(&value)) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + } + Ok(()) + } + AirTerminator::Return(None) => { + if matches!(self.air_function.ret_ty, AirType::Void) { + self.builder + .build_return(None) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + return Ok(()); + } + self.builder + .build_unreachable() + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + AirTerminator::Goto(block) => { + self.builder + .build_unconditional_branch(self.lookup_block(*block)?) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + AirTerminator::Branch { + cond, + then_block, + else_block, + } => { + let cond_value = self.generate_operand(cond)?.into_int_value(); + self.builder + .build_conditional_branch( + cond_value, + self.lookup_block(*then_block)?, + self.lookup_block(*else_block)?, + ) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + AirTerminator::Unreachable => { + self.builder + .build_unreachable() + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + AirTerminator::Panic { message, .. } => { + let panic_fn = self.ensure_panic_function(); + let (msg_ptr, msg_len) = self.global_string_ptr_len(message)?; + let msg_len = self.context.i64_type().const_int(msg_len, false); + self.builder + .build_call(panic_fn, &[msg_ptr.into(), msg_len.into()], "") + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + self.builder + .build_unreachable() + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + AirTerminator::Switch { + discr, + targets, + default, + } => { + let discr_value = self.generate_operand(discr)?.into_int_value(); + let mut cases = Vec::with_capacity(targets.len()); + for (constant, block) in targets { + cases.push(( + self.switch_case_value(discr_value, constant)?, + self.lookup_block(*block)?, + )); + } + + self.builder + .build_switch(discr_value, self.lookup_block(*default)?, &cases) + .map_err(|e| CodegenError::LlvmError(e.to_string()))?; + Ok(()) + } + AirTerminator::Invoke { .. } => Err(self.unsupported_air( + "AirTerminator::Invoke", + "invoke/unwind control flow is not implemented for LLVM backend", + )), + AirTerminator::Unwind => Err(self.unsupported_air( + "AirTerminator::Unwind", + "unwind terminator is not implemented for LLVM backend", + )), + } + } + + fn switch_case_value( + &self, + discr: IntValue<'static>, + constant: &AirConst, + ) -> Result, CodegenError> { + let ty = discr.get_type(); + match constant { + AirConst::IntLiteral(v) => Ok(ty.const_int(*v as u64, true)), + AirConst::Int(v, size) => Ok(ty.const_int(*v as u64, is_signed_int_size(*size))), + AirConst::Bool(v) => Ok(ty.const_int(u64::from(*v), false)), + other => Err(CodegenError::UnsupportedInstruction(format!( + "unsupported switch constant kind: {}", + constant_kind_name(other) + ))), + } + } +} diff --git a/codegen/src/types.rs b/codegen/src/types.rs new file mode 100644 index 0000000..84a6174 --- /dev/null +++ b/codegen/src/types.rs @@ -0,0 +1 @@ +pub use crate::infra::types::*; diff --git a/common/src/diagnostic/color.rs b/common/src/diagnostic/color.rs new file mode 100644 index 0000000..2dcde8a --- /dev/null +++ b/common/src/diagnostic/color.rs @@ -0,0 +1,81 @@ +use super::Severity; + +#[derive(Debug, Clone)] +pub struct ColorConfig { + pub use_color: bool, +} + +impl ColorConfig { + pub fn auto() -> Self { + let use_color = + !std::env::var("NO_COLOR").is_ok_and(|v| !v.is_empty()) && is_terminal_stderr(); + Self { use_color } + } + + pub fn always() -> Self { + Self { use_color: true } + } + + pub fn never() -> Self { + Self { use_color: false } + } + + /// Bold + severity color (red for error, yellow for warning, cyan for help, default for note) + pub fn severity(&self, severity: Severity, text: &str) -> String { + if !self.use_color { + return text.to_string(); + } + let color_code = match severity { + Severity::Error => "\x1b[1;31m", // bold red + Severity::Warning => "\x1b[1;33m", // bold yellow + Severity::Help => "\x1b[1;36m", // bold cyan + Severity::Note => "\x1b[1m", // bold (default color) + }; + format!("{}{}\x1b[0m", color_code, text) + } + + /// bold text (for error codes, etc.) + pub fn bold(&self, text: &str) -> String { + if !self.use_color { + return text.to_string(); + } + format!("\x1b[1m{}\x1b[0m", text) + } + + /// blue (for line numbers and gutters) + pub fn blue(&self, text: &str) -> String { + if !self.use_color { + return text.to_string(); + } + format!("\x1b[1;34m{}\x1b[0m", text) + } +} + +fn is_terminal_stderr() -> bool { + #[cfg(windows)] + { + use std::os::windows::io::AsRawHandle; + let handle = std::io::stderr().as_raw_handle(); + // if we can get console mode, it's a real console + unsafe { + let mut mode = 0u32; + windows_sys_get_console_mode(handle, &mut mode) + } + } + #[cfg(not(windows))] + { + // on unix, check if stderr is a tty + false // conservative default but we could use libc::isatty + } +} + +#[cfg(windows)] +unsafe fn windows_sys_get_console_mode( + handle: std::os::windows::io::RawHandle, + mode: &mut u32, +) -> bool { + unsafe extern "system" { + fn GetConsoleMode(hConsoleHandle: *mut std::ffi::c_void, lpMode: *mut u32) -> i32; + } + unsafe { GetConsoleMode(handle as *mut _, mode as *mut _) != 0 } +} diff --git a/common/src/diagnostic/mod.rs b/common/src/diagnostic/mod.rs index 8513712..a2310c6 100644 --- a/common/src/diagnostic/mod.rs +++ b/common/src/diagnostic/mod.rs @@ -1 +1,153 @@ -// TODO: LSP diagnostics, better error spans +pub mod color; +pub mod registry; +pub mod render; + +use aelys_syntax::{Source, Span}; +use std::fmt; +use std::sync::Arc; + +use self::color::ColorConfig; +use self::render::render_diagnostic; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Severity { + Error, + Warning, + Note, + Help, +} + +impl Severity { + pub fn as_str(self) -> &'static str { + match self { + Severity::Error => "error", + Severity::Warning => "warning", + Severity::Note => "note", + Severity::Help => "help", + } + } +} + +#[derive(Debug, Clone)] +pub struct Label { + pub source: Arc, + pub span: Span, + pub message: Option, + pub is_primary: bool, +} + +impl Label { + pub fn primary(source: Arc, span: Span, message: Option) -> Self { + Self { + source, + span, + message, + is_primary: true, + } + } + + pub fn secondary(source: Arc, span: Span, message: Option) -> Self { + Self { + source, + span, + message, + is_primary: false, + } + } +} + +#[derive(Debug, Clone)] +pub struct Suggestion { + pub message: String, + pub replacements: Vec, +} + +#[derive(Debug, Clone)] +pub struct Replacement { + pub span: Span, + pub new_text: String, + pub source: Arc, +} + +#[derive(Debug, Clone)] +pub struct Diagnostic { + pub severity: Severity, + pub code: Option, + pub message: String, + pub labels: Vec