diff --git a/Cargo.lock b/Cargo.lock index c70d5cf2cf..43dfb188f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -286,7 +286,7 @@ dependencies = [ [[package]] name = "byte-array-literals" -version = "32.0.0" +version = "33.0.0" [[package]] name = "byteorder" @@ -680,7 +680,7 @@ dependencies = [ [[package]] name = "cranelift" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-codegen", "cranelift-frontend", @@ -693,7 +693,7 @@ dependencies = [ [[package]] name = "cranelift-assembler-x64" -version = "0.119.0" +version = "0.120.0" dependencies = [ "arbitrary", "arbtest", @@ -711,21 +711,21 @@ dependencies = [ [[package]] name = "cranelift-assembler-x64-meta" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-srcgen", ] [[package]] name = "cranelift-bforest" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-bitset" -version = "0.119.0" +version = "0.120.0" dependencies = [ "arbitrary", "serde", @@ -734,7 +734,7 @@ dependencies = [ [[package]] name = "cranelift-codegen" -version = "0.119.0" +version = "0.120.0" dependencies = [ "anyhow", "bumpalo", @@ -767,7 +767,7 @@ dependencies = [ [[package]] name = "cranelift-codegen-meta" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-assembler-x64-meta", "cranelift-codegen-shared", @@ -777,18 +777,18 @@ dependencies = [ [[package]] name = "cranelift-codegen-shared" -version = "0.119.0" +version = "0.120.0" [[package]] name = "cranelift-control" -version = "0.119.0" +version = "0.120.0" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-bitset", "serde", @@ -827,7 +827,7 @@ dependencies = [ [[package]] name = "cranelift-frontend" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-codegen", "env_logger 0.11.5", @@ -851,7 +851,7 @@ dependencies = [ [[package]] name = "cranelift-interpreter" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-codegen", "cranelift-entity", @@ -865,7 +865,7 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.119.0" +version = "0.120.0" dependencies = [ "codespan-reporting", "log", @@ -874,7 +874,7 @@ dependencies = [ [[package]] name = "cranelift-jit" -version = "0.119.0" +version = "0.120.0" dependencies = [ "anyhow", "cranelift", @@ -895,7 +895,7 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.119.0" +version = "0.120.0" dependencies = [ "anyhow", "cranelift-codegen", @@ -907,7 +907,7 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.119.0" +version = "0.120.0" dependencies = [ "cranelift-codegen", "libc", @@ -916,7 +916,7 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.119.0" +version = "0.120.0" dependencies = [ "anyhow", "cranelift-codegen", @@ -931,7 +931,7 @@ dependencies = [ [[package]] name = "cranelift-reader" -version = "0.119.0" +version = "0.120.0" dependencies = [ "anyhow", "cranelift-codegen", @@ -941,7 +941,7 @@ dependencies = [ [[package]] name = "cranelift-serde" -version = "0.119.0" +version = "0.120.0" dependencies = [ "clap", "cranelift-codegen", @@ -951,7 +951,7 @@ dependencies = [ [[package]] name = "cranelift-srcgen" -version = "0.119.0" +version = "0.120.0" [[package]] name = "cranelift-tools" @@ -1215,7 +1215,7 @@ checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" [[package]] name = "embedding" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "dlmalloc", @@ -2308,7 +2308,7 @@ dependencies = [ [[package]] name = "min-platform-host" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "libloading", @@ -2701,7 +2701,7 @@ dependencies = [ [[package]] name = "pulley-interpreter" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "arbitrary", @@ -2850,9 +2850,9 @@ dependencies = [ [[package]] name = "regalloc2" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc06e6b318142614e4a48bc725abbf08ff166694835c43c9dae5a9009704639a" +checksum = "6d4c3c15aa088eccea44550bffea9e9a5d0b14a264635323d23c6e6351acca98" dependencies = [ "allocator-api2", "bumpalo", @@ -3895,7 +3895,7 @@ version = "0.1.0" [[package]] name = "verify-component-adapter" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "wasmparser 0.228.0", @@ -3954,7 +3954,7 @@ dependencies = [ [[package]] name = "wasi-common" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "bitflags 2.6.0", @@ -4003,7 +4003,7 @@ dependencies = [ [[package]] name = "wasi-preview1-component-adapter" -version = "32.0.0" +version = "33.0.0" dependencies = [ "bitflags 2.6.0", "byte-array-literals", @@ -4229,7 +4229,7 @@ dependencies = [ [[package]] name = "wasmtime" -version = "32.0.0" +version = "33.0.0" dependencies = [ "addr2line", "anyhow", @@ -4296,14 +4296,14 @@ dependencies = [ [[package]] name = "wasmtime-asm-macros" -version = "32.0.0" +version = "33.0.0" dependencies = [ "cfg-if", ] [[package]] name = "wasmtime-bench-api" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "cap-std", @@ -4319,14 +4319,14 @@ dependencies = [ [[package]] name = "wasmtime-c-api" -version = "32.0.0" +version = "33.0.0" dependencies = [ "wasmtime-c-api-impl", ] [[package]] name = "wasmtime-c-api-impl" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "cap-std", @@ -4343,7 +4343,7 @@ dependencies = [ [[package]] name = "wasmtime-c-api-macros" -version = "32.0.0" +version = "33.0.0" dependencies = [ "proc-macro2", "quote", @@ -4351,7 +4351,7 @@ dependencies = [ [[package]] name = "wasmtime-cache" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "base64", @@ -4372,7 +4372,7 @@ dependencies = [ [[package]] name = "wasmtime-cli" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "async-trait", @@ -4445,7 +4445,7 @@ dependencies = [ [[package]] name = "wasmtime-cli-flags" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "clap", @@ -4460,7 +4460,7 @@ dependencies = [ [[package]] name = "wasmtime-component-macro" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "component-macro-test-helpers", @@ -4480,11 +4480,11 @@ dependencies = [ [[package]] name = "wasmtime-component-util" -version = "32.0.0" +version = "33.0.0" [[package]] name = "wasmtime-cranelift" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "cfg-if", @@ -4508,7 +4508,7 @@ dependencies = [ [[package]] name = "wasmtime-environ" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "clap", @@ -4550,7 +4550,7 @@ dependencies = [ [[package]] name = "wasmtime-explorer" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "capstone", @@ -4565,7 +4565,7 @@ dependencies = [ [[package]] name = "wasmtime-fiber" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "backtrace", @@ -4636,7 +4636,7 @@ dependencies = [ [[package]] name = "wasmtime-jit-debug" -version = "32.0.0" +version = "33.0.0" dependencies = [ "cc", "object", @@ -4646,7 +4646,7 @@ dependencies = [ [[package]] name = "wasmtime-jit-icache-coherence" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "cfg-if", @@ -4656,14 +4656,14 @@ dependencies = [ [[package]] name = "wasmtime-math" -version = "32.0.0" +version = "33.0.0" dependencies = [ "libm", ] [[package]] name = "wasmtime-slab" -version = "32.0.0" +version = "33.0.0" [[package]] name = "wasmtime-test-macros" @@ -4678,7 +4678,7 @@ dependencies = [ [[package]] name = "wasmtime-test-util" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "arbitrary", @@ -4696,7 +4696,7 @@ dependencies = [ [[package]] name = "wasmtime-versioned-export-macros" -version = "32.0.0" +version = "33.0.0" dependencies = [ "proc-macro2", "quote", @@ -4705,7 +4705,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "async-trait", @@ -4739,7 +4739,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-config" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "test-programs-artifacts", @@ -4750,7 +4750,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-http" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "async-trait", @@ -4776,7 +4776,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-io" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "async-trait", @@ -4787,7 +4787,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-keyvalue" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "test-programs-artifacts", @@ -4798,7 +4798,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-nn" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "cap-std", @@ -4819,7 +4819,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-threads" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "log", @@ -4831,7 +4831,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-tls" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "bytes", @@ -4847,7 +4847,7 @@ dependencies = [ [[package]] name = "wasmtime-wast" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "log", @@ -4858,7 +4858,7 @@ dependencies = [ [[package]] name = "wasmtime-winch" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "cranelift-codegen", @@ -4873,7 +4873,7 @@ dependencies = [ [[package]] name = "wasmtime-wit-bindgen" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "heck 0.5.0", @@ -4883,7 +4883,7 @@ dependencies = [ [[package]] name = "wasmtime-wmemcheck" -version = "32.0.0" +version = "33.0.0" [[package]] name = "wast" @@ -4947,7 +4947,7 @@ dependencies = [ [[package]] name = "wiggle" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "async-trait", @@ -4964,7 +4964,7 @@ dependencies = [ [[package]] name = "wiggle-generate" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "heck 0.5.0", @@ -4977,7 +4977,7 @@ dependencies = [ [[package]] name = "wiggle-macro" -version = "32.0.0" +version = "33.0.0" dependencies = [ "proc-macro2", "quote", @@ -5032,7 +5032,7 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "winch-codegen" -version = "32.0.0" +version = "33.0.0" dependencies = [ "anyhow", "cranelift-codegen", diff --git a/Cargo.toml b/Cargo.toml index 63e32cb656..3402e81560 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,7 +170,7 @@ exclude = [ ] [workspace.package] -version = "32.0.0" +version = "33.0.0" authors = ["The Wasmtime Project Developers"] edition = "2021" # Wasmtime's current policy is that this number can be no larger than the @@ -223,72 +223,72 @@ allow_attributes_without_reason = 'warn' [workspace.dependencies] arbitrary = { version = "1.4.0" } -wasmtime-wmemcheck = { path = "crates/wmemcheck", version = "=32.0.0" } -wasmtime = { path = "crates/wasmtime", version = "32.0.0", default-features = false } -wasmtime-c-api-macros = { path = "crates/c-api-macros", version = "=32.0.0" } -wasmtime-cache = { path = "crates/cache", version = "=32.0.0" } -wasmtime-cli-flags = { path = "crates/cli-flags", version = "=32.0.0" } -wasmtime-cranelift = { path = "crates/cranelift", version = "=32.0.0" } -wasmtime-winch = { path = "crates/winch", version = "=32.0.0" } -wasmtime-environ = { path = "crates/environ", version = "=32.0.0" } -wasmtime-explorer = { path = "crates/explorer", version = "=32.0.0" } -wasmtime-fiber = { path = "crates/fiber", version = "=32.0.0" } -wasmtime-jit-debug = { path = "crates/jit-debug", version = "=32.0.0" } -wasmtime-wast = { path = "crates/wast", version = "=32.0.0" } -wasmtime-wasi = { path = "crates/wasi", version = "32.0.0", default-features = false } -wasmtime-wasi-io = { path = "crates/wasi-io", version = "32.0.0", default-features = false } -wasmtime-wasi-http = { path = "crates/wasi-http", version = "32.0.0", default-features = false } -wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "32.0.0" } -wasmtime-wasi-config = { path = "crates/wasi-config", version = "32.0.0" } -wasmtime-wasi-keyvalue = { path = "crates/wasi-keyvalue", version = "32.0.0" } -wasmtime-wasi-threads = { path = "crates/wasi-threads", version = "32.0.0" } -wasmtime-component-util = { path = "crates/component-util", version = "=32.0.0" } -wasmtime-component-macro = { path = "crates/component-macro", version = "=32.0.0" } -wasmtime-asm-macros = { path = "crates/asm-macros", version = "=32.0.0" } -wasmtime-versioned-export-macros = { path = "crates/versioned-export-macros", version = "=32.0.0" } -wasmtime-slab = { path = "crates/slab", version = "=32.0.0" } -wasmtime-wasi-tls = { path = "crates/wasi-tls", version = "32.0.0" } -component-async-tests = { path = "crates/misc/component-async-tests" } -wiggle = { path = "crates/wiggle", version = "=32.0.0", default-features = false } -wiggle-macro = { path = "crates/wiggle/macro", version = "=32.0.0" } -wiggle-generate = { path = "crates/wiggle/generate", version = "=32.0.0" } -wasi-common = { path = "crates/wasi-common", version = "=32.0.0", default-features = false } +wasmtime-wmemcheck = { path = "crates/wmemcheck", version = "=33.0.0" } +wasmtime = { path = "crates/wasmtime", version = "33.0.0", default-features = false } +wasmtime-c-api-macros = { path = "crates/c-api-macros", version = "=33.0.0" } +wasmtime-cache = { path = "crates/cache", version = "=33.0.0" } +wasmtime-cli-flags = { path = "crates/cli-flags", version = "=33.0.0" } +wasmtime-cranelift = { path = "crates/cranelift", version = "=33.0.0" } +wasmtime-winch = { path = "crates/winch", version = "=33.0.0" } +wasmtime-environ = { path = "crates/environ", version = "=33.0.0" } +wasmtime-explorer = { path = "crates/explorer", version = "=33.0.0" } +wasmtime-fiber = { path = "crates/fiber", version = "=33.0.0" } +wasmtime-jit-debug = { path = "crates/jit-debug", version = "=33.0.0" } +wasmtime-wast = { path = "crates/wast", version = "=33.0.0" } +wasmtime-wasi = { path = "crates/wasi", version = "33.0.0", default-features = false } +wasmtime-wasi-io = { path = "crates/wasi-io", version = "33.0.0", default-features = false } +wasmtime-wasi-http = { path = "crates/wasi-http", version = "33.0.0", default-features = false } +wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "33.0.0" } +wasmtime-wasi-config = { path = "crates/wasi-config", version = "33.0.0" } +wasmtime-wasi-keyvalue = { path = "crates/wasi-keyvalue", version = "33.0.0" } +wasmtime-wasi-threads = { path = "crates/wasi-threads", version = "33.0.0" } +wasmtime-component-util = { path = "crates/component-util", version = "=33.0.0" } +wasmtime-component-macro = { path = "crates/component-macro", version = "=33.0.0" } +wasmtime-asm-macros = { path = "crates/asm-macros", version = "=33.0.0" } +wasmtime-versioned-export-macros = { path = "crates/versioned-export-macros", version = "=33.0.0" } +wasmtime-slab = { path = "crates/slab", version = "=33.0.0" } +wasmtime-wasi-tls = { path = "crates/wasi-tls", version = "33.0.0" } +wiggle = { path = "crates/wiggle", version = "=33.0.0", default-features = false } +wiggle-macro = { path = "crates/wiggle/macro", version = "=33.0.0" } +wiggle-generate = { path = "crates/wiggle/generate", version = "=33.0.0" } +wasi-common = { path = "crates/wasi-common", version = "=33.0.0", default-features = false } wasmtime-fuzzing = { path = "crates/fuzzing" } -wasmtime-jit-icache-coherence = { path = "crates/jit-icache-coherence", version = "=32.0.0" } -wasmtime-wit-bindgen = { path = "crates/wit-bindgen", version = "=32.0.0" } -wasmtime-math = { path = "crates/math", version = "=32.0.0" } +wasmtime-jit-icache-coherence = { path = "crates/jit-icache-coherence", version = "=33.0.0" } +wasmtime-wit-bindgen = { path = "crates/wit-bindgen", version = "=33.0.0" } +wasmtime-math = { path = "crates/math", version = "=33.0.0" } test-programs-artifacts = { path = 'crates/test-programs/artifacts' } wasmtime-test-util = { path = "crates/test-util" } +component-async-tests = { path = "crates/misc/component-async-tests" } -pulley-interpreter = { path = 'pulley', version = "=32.0.0" } +pulley-interpreter = { path = 'pulley', version = "=33.0.0" } pulley-interpreter-fuzz = { path = 'pulley/fuzz' } -cranelift-assembler-x64 = { path = "cranelift/assembler-x64", version = "0.119.0" } -cranelift-codegen = { path = "cranelift/codegen", version = "0.119.0", default-features = false, features = ["std", "unwind"] } -cranelift-frontend = { path = "cranelift/frontend", version = "0.119.0" } -cranelift-entity = { path = "cranelift/entity", version = "0.119.0" } -cranelift-native = { path = "cranelift/native", version = "0.119.0" } -cranelift-module = { path = "cranelift/module", version = "0.119.0" } -cranelift-interpreter = { path = "cranelift/interpreter", version = "0.119.0" } -cranelift-reader = { path = "cranelift/reader", version = "0.119.0" } +cranelift-assembler-x64 = { path = "cranelift/assembler-x64", version = "0.120.0" } +cranelift-codegen = { path = "cranelift/codegen", version = "0.120.0", default-features = false, features = ["std", "unwind"] } +cranelift-frontend = { path = "cranelift/frontend", version = "0.120.0" } +cranelift-entity = { path = "cranelift/entity", version = "0.120.0" } +cranelift-native = { path = "cranelift/native", version = "0.120.0" } +cranelift-module = { path = "cranelift/module", version = "0.120.0" } +cranelift-interpreter = { path = "cranelift/interpreter", version = "0.120.0" } +cranelift-reader = { path = "cranelift/reader", version = "0.120.0" } cranelift-filetests = { path = "cranelift/filetests" } -cranelift-object = { path = "cranelift/object", version = "0.119.0" } -cranelift-jit = { path = "cranelift/jit", version = "0.119.0" } +cranelift-object = { path = "cranelift/object", version = "0.120.0" } +cranelift-jit = { path = "cranelift/jit", version = "0.120.0" } cranelift-fuzzgen = { path = "cranelift/fuzzgen" } -cranelift-bforest = { path = "cranelift/bforest", version = "0.119.0" } -cranelift-bitset = { path = "cranelift/bitset", version = "0.119.0" } -cranelift-control = { path = "cranelift/control", version = "0.119.0" } -cranelift-srcgen = { path = "cranelift/srcgen", version = "0.119.0" } -cranelift = { path = "cranelift/umbrella", version = "0.119.0" } +cranelift-bforest = { path = "cranelift/bforest", version = "0.120.0" } +cranelift-bitset = { path = "cranelift/bitset", version = "0.120.0" } +cranelift-control = { path = "cranelift/control", version = "0.120.0" } +cranelift-srcgen = { path = "cranelift/srcgen", version = "0.120.0" } +cranelift = { path = "cranelift/umbrella", version = "0.120.0" } -winch-codegen = { path = "winch/codegen", version = "=32.0.0" } +winch-codegen = { path = "winch/codegen", version = "=33.0.0" } wasi-preview1-component-adapter = { path = "crates/wasi-preview1-component-adapter" } byte-array-literals = { path = "crates/wasi-preview1-component-adapter/byte-array-literals" } # Bytecode Alliance maintained dependencies: # --------------------------- -regalloc2 = "0.11.2" +regalloc2 = "0.11.3" # cap-std family: target-lexicon = "0.13.0" diff --git a/RELEASES.md b/RELEASES.md index f1c81d9a82..ae11cbe030 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -1,4 +1,4 @@ -## 32.0.0 +## 33.0.0 Unreleased. @@ -12,6 +12,7 @@ Release notes for previous releases of Wasmtime can be found on the respective release branches of the Wasmtime repository. +* [32.0.x](https://github.com/bytecodealliance/wasmtime/blob/release-32.0.0/RELEASES.md) * [31.0.x](https://github.com/bytecodealliance/wasmtime/blob/release-31.0.0/RELEASES.md) * [30.0.x](https://github.com/bytecodealliance/wasmtime/blob/release-30.0.0/RELEASES.md) * [29.0.x](https://github.com/bytecodealliance/wasmtime/blob/release-29.0.0/RELEASES.md) diff --git a/cranelift/assembler-x64/Cargo.toml b/cranelift/assembler-x64/Cargo.toml index f9b1ec2dfe..5a6540625f 100644 --- a/cranelift/assembler-x64/Cargo.toml +++ b/cranelift/assembler-x64/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cranelift-assembler-x64" description = "A Cranelift-specific x64 assembler" -version = "0.119.0" +version = "0.120.0" license = "Apache-2.0 WITH LLVM-exception" edition.workspace = true rust-version.workspace = true @@ -16,7 +16,7 @@ arbtest = "0.3.1" capstone = { workspace = true } [build-dependencies] -cranelift-assembler-x64-meta = { path = "meta", version = "0.119.0" } +cranelift-assembler-x64-meta = { path = "meta", version = "0.120.0" } [lints] workspace = true diff --git a/cranelift/assembler-x64/meta/Cargo.toml b/cranelift/assembler-x64/meta/Cargo.toml index 7706859db3..9f54b021a6 100644 --- a/cranelift/assembler-x64/meta/Cargo.toml +++ b/cranelift/assembler-x64/meta/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cranelift-assembler-x64-meta" description = "Generate a Cranelift-specific assembler for x64 instructions" -version = "0.119.0" +version = "0.120.0" license = "Apache-2.0 WITH LLVM-exception" edition.workspace = true rust-version.workspace = true diff --git a/cranelift/bforest/Cargo.toml b/cranelift/bforest/Cargo.toml index b0eb3ad70f..7f4777c30c 100644 --- a/cranelift/bforest/Cargo.toml +++ b/cranelift/bforest/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-bforest" -version = "0.119.0" +version = "0.120.0" description = "A forest of B+-trees" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-bforest" diff --git a/cranelift/bitset/Cargo.toml b/cranelift/bitset/Cargo.toml index db52b2678f..d926a91ed6 100644 --- a/cranelift/bitset/Cargo.toml +++ b/cranelift/bitset/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-bitset" -version = "0.119.0" +version = "0.120.0" description = "Various bitset stuff for use inside Cranelift" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-bitset" diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index fccc1354a6..d26e7b34a6 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-codegen" -version = "0.119.0" +version = "0.120.0" description = "Low-level code generator library" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-codegen" @@ -25,7 +25,7 @@ anyhow = { workspace = true, optional = true, features = ['std'] } bumpalo = "3" capstone = { workspace = true, optional = true } cranelift-assembler-x64 = { workspace = true } -cranelift-codegen-shared = { path = "./shared", version = "0.119.0" } +cranelift-codegen-shared = { path = "./shared", version = "0.120.0" } cranelift-entity = { workspace = true } cranelift-bforest = { workspace = true } cranelift-bitset = { workspace = true } @@ -54,8 +54,8 @@ similar = "2.1.0" env_logger = { workspace = true } [build-dependencies] -cranelift-codegen-meta = { path = "meta", version = "0.119.0" } -cranelift-isle = { path = "../isle/isle", version = "=0.119.0" } +cranelift-codegen-meta = { path = "meta", version = "0.120.0" } +cranelift-isle = { path = "../isle/isle", version = "=0.120.0" } [features] default = ["std", "unwind", "host-arch", "timing"] diff --git a/cranelift/codegen/meta/Cargo.toml b/cranelift/codegen/meta/Cargo.toml index 6b8fdbc5f9..e96fd66b88 100644 --- a/cranelift/codegen/meta/Cargo.toml +++ b/cranelift/codegen/meta/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cranelift-codegen-meta" authors = ["The Cranelift Project Developers"] -version = "0.119.0" +version = "0.120.0" description = "Metaprogram for cranelift-codegen code generator library" license = "Apache-2.0 WITH LLVM-exception" repository = "https://github.com/bytecodealliance/wasmtime" @@ -17,8 +17,8 @@ rustdoc-args = ["--document-private-items"] [dependencies] cranelift-srcgen = { workspace = true } -cranelift-assembler-x64-meta = { path = "../../assembler-x64/meta", version = "0.119.0" } -cranelift-codegen-shared = { path = "../shared", version = "0.119.0" } +cranelift-assembler-x64-meta = { path = "../../assembler-x64/meta", version = "0.120.0" } +cranelift-codegen-shared = { path = "../shared", version = "0.120.0" } pulley-interpreter = { workspace = true, optional = true } [features] diff --git a/cranelift/codegen/shared/Cargo.toml b/cranelift/codegen/shared/Cargo.toml index 887771da8e..e14f5bb07d 100644 --- a/cranelift/codegen/shared/Cargo.toml +++ b/cranelift/codegen/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-codegen-shared" -version = "0.119.0" +version = "0.120.0" description = "For code shared between cranelift-codegen-meta and cranelift-codegen" license = "Apache-2.0 WITH LLVM-exception" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 9ade3ebea3..1c5eb7b96b 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1149,6 +1149,7 @@ impl ABIMachineSpec for AArch64MachineDeps { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -1188,10 +1189,17 @@ impl ABIMachineSpec for AArch64MachineDeps { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x9 as a temp if needed: clobbered, not a + // retval. + regs::writable_xreg(9) + } } impl AArch64MachineDeps { diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index adc7396c47..0d6e11e32f 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2951,6 +2951,17 @@ impl MachInstEmit for Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + start_off = sink.cur_offset(); } &Inst::CallInd { ref info } => { let user_stack_map = state.take_stack_map(); @@ -2970,6 +2981,17 @@ impl MachInstEmit for Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + start_off = sink.cur_offset(); } &Inst::ReturnCall { ref info } => { emit_return_call_common_sequence(sink, emit_info, state, info); diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index ba7103a32a..84dae78933 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -839,8 +839,11 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -852,8 +855,11 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index a29c1146ca..0f4a4efbe2 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -405,14 +405,14 @@ where } fn gen_return( - _call_conv: isa::CallConv, + call_conv: isa::CallConv, _isa_flags: &PulleyFlags, frame_layout: &FrameLayout, ) -> SmallInstVec { let mut insts = SmallVec::new(); // Handle final stack adjustments for the tail-call ABI. - if frame_layout.tail_args_size > 0 { + if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 { insts.extend(Self::gen_sp_reg_adjust( frame_layout.tail_args_size.try_into().unwrap(), )); @@ -544,6 +544,7 @@ where is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -578,6 +579,7 @@ where setup_area_size: setup_area_size.into(), clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } @@ -592,6 +594,12 @@ where // Pulley doesn't need inline probestacks because it always checks stack // decrements. } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x15 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(15)) + } } /// Different styles of management of fp/lr and clobbered registers. diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index e68d68dfe2..bdca3c8d97 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -89,6 +89,9 @@ ;; emit this instruction and auto-generated methods for other various ;; bits and pieces of boilerplate in the backend. (Raw (raw RawInst)) + + ;; Island generation prior to variable-length instructions. + (EmitIsland (space_needed u32)) ) ) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 7adec74bcd..1a5095e6fe 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -188,6 +188,17 @@ fn pulley_emit

( for i in PulleyMachineDeps::

::gen_sp_reg_adjust(adjust) { >::from(i).emit(sink, emit_info, state); } + + // Load any stack-carried return values. + info.emit_retval_loads::, _, _>( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + *start_offset = sink.cur_offset(); } Inst::IndirectCall { info } => { @@ -204,6 +215,17 @@ fn pulley_emit

( for i in PulleyMachineDeps::

::gen_sp_reg_adjust(adjust) { >::from(i).emit(sink, emit_info, state); } + + // Load any stack-carried return values. + info.emit_retval_loads::, _, _>( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |space_needed| Some(>::from(Inst::EmitIsland { space_needed })), + ); + + // We produce an island above if needed, so disable + // the worst-case-size check in this case. + *start_offset = sink.cur_offset(); } Inst::ReturnCall { info } => { @@ -517,6 +539,13 @@ fn pulley_emit

( } super::generated::emit(raw, sink) } + + Inst::EmitIsland { space_needed } => { + let label = sink.get_label(); + >::from(Inst::Jump { label }).emit(sink, emit_info, state); + sink.emit_island(space_needed + 8, &mut state.ctrl_plane); + sink.bind_label(label, &mut state.ctrl_plane); + } } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 78b1ea0eb8..1956b23678 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -169,8 +169,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -179,8 +182,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -190,8 +196,11 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -298,6 +307,8 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { } Inst::Raw { raw } => generated::get_operands(raw, collector), + + Inst::EmitIsland { .. } => {} } } @@ -745,6 +756,8 @@ impl Inst { format!("br_table {idx} {default:?} {targets:?}") } Inst::Raw { raw } => generated::print(raw), + + Inst::EmitIsland { space_needed } => format!("emit_island {space_needed}"), } } } diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 3bde5ea9bb..5364b78dd0 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -649,6 +649,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -684,6 +685,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } @@ -719,6 +721,12 @@ impl ABIMachineSpec for Riscv64MachineDeps { }); } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use x12 as a temp if needed: clobbered, not a + // retval. + Writable::from_reg(regs::x_reg(12)) + } } impl Riscv64ABICallSite { diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index acdfa35f5d..7d6b5c96ad 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -345,6 +345,10 @@ (flags MemFlags) (mask VecOpMasking) (vstate VState)) + + (EmitIsland + ;; The needed space before the next deadline. + (needed_space u32)) )) (type AtomicOP (enum diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 2951e2a8f6..36e72db5c2 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -210,6 +210,7 @@ impl Inst { // some cases. Inst::VecLoad { vstate, .. } | Inst::VecStore { vstate, .. } => Some(vstate), + Inst::EmitIsland { .. } => None, } } } @@ -1133,6 +1134,15 @@ impl Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + *start_off = sink.cur_offset(); } &Inst::CallInd { ref info } => { Inst::Jalr { @@ -1155,6 +1165,15 @@ impl Inst { inst.emit(sink, emit_info, state); } } + + // Load any stack-carried return values. + info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, emit_info, state), + |needed_space| Some(Inst::EmitIsland { needed_space }), + ); + + *start_off = sink.cur_offset(); } &Inst::ReturnCall { ref info } => { @@ -2577,7 +2596,14 @@ impl Inst { to.nf(), )); } - }; + + Inst::EmitIsland { needed_space } => { + let jump_around_label = sink.get_label(); + Inst::gen_jump(jump_around_label).emit(sink, emit_info, state); + sink.emit_island(needed_space + 4, &mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 6e593f6901..d75e0c1c82 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -335,8 +335,11 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -348,8 +351,11 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(info.clobbers); } @@ -680,6 +686,7 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_use(from); vec_mask_operands(mask, collector); } + Inst::EmitIsland { .. } => {} } } @@ -1612,6 +1619,9 @@ impl Inst { format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") } + Inst::EmitIsland { needed_space } => { + format!("emit_island {needed_space}") + } } } } diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 845599cb04..34999f1d79 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -264,6 +264,16 @@ impl Into for StackAMode { } } +/// Lane order to be used for a given calling convention. +impl From for LaneOrder { + fn from(call_conv: isa::CallConv) -> Self { + match call_conv { + isa::CallConv::Tail => LaneOrder::LittleEndian, + _ => LaneOrder::BigEndian, + } + } +} + /// S390x-specific ABI behavior. This struct just serves as an implementation /// point for the trait; it is never actually instantiated. pub struct S390xMachineDeps; @@ -914,6 +924,7 @@ impl ABIMachineSpec for S390xMachineDeps { _is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, mut outgoing_args_size: u32, ) -> FrameLayout { @@ -985,18 +996,23 @@ impl ABIMachineSpec for S390xMachineDeps { setup_area_size: 0, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + panic!("Should not be called"); + } } impl S390xMachineDeps { pub fn gen_tail_epilogue( frame_layout: &FrameLayout, callee_pop_size: u32, - target_reg: Option<&mut Reg>, - ) -> SmallVec<[Inst; 16]> { + dest: &CallInstDest, + ) -> (SmallVec<[Inst; 16]>, Option) { let mut insts = SmallVec::new(); let call_conv = isa::CallConv::Tail; @@ -1006,19 +1022,122 @@ impl S390xMachineDeps { // If the tail call target is in a callee-saved GPR, we need to move it // to %r1 (as the only available temp register) before restoring GPRs // (but after restoring FPRs, which might clobber %r1). - if let Some(reg) = target_reg { - if is_reg_saved_in_prologue(call_conv, reg.to_real_reg().unwrap()) { + let temp_dest = match dest { + CallInstDest::Indirect { reg } + if is_reg_saved_in_prologue(call_conv, reg.to_real_reg().unwrap()) => + { insts.push(Inst::Mov64 { rd: writable_gpr(1), rm: *reg, }); - *reg = gpr(1); + Some(gpr(1)) } - } + _ => None, + }; // Restore GPRs (including SP). insts.extend(gen_restore_gprs(call_conv, frame_layout, callee_pop_size)); + (insts, temp_dest) + } + + /// Emit loads for any stack-carried return values using the call + /// info and allocations. In addition, emit lane swaps for all + /// vector-types return values if needed. + pub fn gen_retval_loads(info: &CallInfo) -> SmallInstVec { + let mut insts = SmallVec::new(); + + // Helper routine to lane-swap a register if needed. + let lane_swap_if_needed = |insts: &mut SmallInstVec, vreg, ty: Type| { + if LaneOrder::from(info.caller_conv) != LaneOrder::from(info.callee_conv) { + if ty.is_vector() { + if ty.lane_count() >= 2 { + insts.push(Inst::VecPermuteDWImm { + rd: vreg, + rn: vreg.to_reg(), + rm: vreg.to_reg(), + idx1: 1, + idx2: 0, + }); + } + if ty.lane_count() >= 4 { + insts.push(Inst::VecShiftRR { + shift_op: VecShiftOp::RotL64x2, + rd: vreg, + rn: vreg.to_reg(), + shift_imm: 32, + shift_reg: zero_reg(), + }); + } + if ty.lane_count() >= 8 { + insts.push(Inst::VecShiftRR { + shift_op: VecShiftOp::RotL32x4, + rd: vreg, + rn: vreg.to_reg(), + shift_imm: 16, + shift_reg: zero_reg(), + }); + } + if ty.lane_count() >= 16 { + insts.push(Inst::VecShiftRR { + shift_op: VecShiftOp::RotL16x8, + rd: vreg, + rn: vreg.to_reg(), + shift_imm: 8, + shift_reg: zero_reg(), + }); + } + } + } + }; + + // Helper routine to allocate a temp register for ty. + let temp_reg = |ty| match Inst::rc_for_type(ty).unwrap() { + (&[RegClass::Int], _) => writable_gpr(0), + (&[RegClass::Float], _) => writable_vr(1), + _ => unreachable!(), + }; + + // Do a first pass over the return locations to handle copies that + // need temp registers. These need to be done before regular stack + // loads in case the destination of a load happens to be our temp + // register. (The temp registers by choice are distinct from all + // real return registers, which we verify here again.) + for CallRetPair { vreg, location } in &info.defs { + match location { + RetLocation::Reg(preg, ty) => { + debug_assert!(*preg != temp_reg(*ty).to_reg()); + } + RetLocation::Stack(amode, ty) => { + if let Some(spillslot) = vreg.to_reg().to_spillslot() { + let temp = temp_reg(*ty); + insts.push(Inst::gen_load(temp, (*amode).into(), *ty)); + lane_swap_if_needed(&mut insts, temp, *ty); + insts.push(Inst::gen_store( + MemArg::SpillOffset { + off: 8 * (spillslot.index() as i64), + }, + temp.to_reg(), + Inst::canonical_type_for_rc(temp.to_reg().class()), + )); + } + } + } + } + // Now handle all remaining return locations. + for CallRetPair { vreg, location } in &info.defs { + match location { + RetLocation::Reg(preg, ty) => { + lane_swap_if_needed(&mut insts, Writable::from_reg(*preg), *ty); + } + RetLocation::Stack(amode, ty) => { + if vreg.to_reg().to_spillslot().is_none() { + insts.push(Inst::gen_load(*vreg, (*amode).into(), *ty)); + lane_swap_if_needed(&mut insts, *vreg, *ty); + } + } + } + } insts } } diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 361aeded01..e0f56809a9 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -885,24 +885,15 @@ (AllocateArgs (size u32)) - ;; A machine call instruction. + ;; A machine call instruction (direct or indirect). (Call (link WritableReg) (info BoxCallInfo)) - ;; A machine indirect-call instruction. - (CallInd - (link WritableReg) - (info BoxCallIndInfo)) - - ;; A machine tail call instruction. + ;; A machine tail call instruction (direct or indirect). (ReturnCall (info BoxReturnCallInfo)) - ;; A machine indirect tail call instruction. - (ReturnCallInd - (info BoxReturnCallIndInfo)) - ;; A pseudo-instruction that captures register arguments in vregs. (Args (args VecArgPair)) @@ -1018,12 +1009,25 @@ ;; Primitive types used in instruction formats. (type BoxCallInfo (primitive BoxCallInfo)) -(type BoxCallIndInfo (primitive BoxCallIndInfo)) (type BoxReturnCallInfo (primitive BoxReturnCallInfo)) -(type BoxReturnCallIndInfo (primitive BoxReturnCallIndInfo)) (type BoxJTSequenceInfo (primitive BoxJTSequenceInfo)) (type VecMachLabel extern (enum)) +;; Information about a call site. +(type CallSiteInfo (primitive CallSiteInfo)) +(decl call_site_info (BoxCallInfo InstOutput) CallSiteInfo) +(extern extractor infallible call_site_info call_site_info_split) + +;; The destination of a call instruction. +(type CallInstDest + (enum + ;; Direct call. + (Direct + (name ExternalName)) + ;; Indirect call. + (Indirect + (reg Reg)))) + ;; A symbol reference carrying relocation information. (type SymbolReloc (enum @@ -2665,25 +2669,16 @@ dst)) ;; Helper for emitting `MInst.Call` instructions. -(decl call_impl (WritableReg BoxCallInfo) SideEffectNoResult) -(rule (call_impl reg info) - (SideEffectNoResult.Inst (MInst.Call reg info))) - -;; Helper for emitting `MInst.CallInd` instructions. -(decl call_ind_impl (WritableReg BoxCallIndInfo) SideEffectNoResult) -(rule (call_ind_impl reg info) - (SideEffectNoResult.Inst (MInst.CallInd reg info))) +(decl call_impl (WritableReg CallSiteInfo) InstOutput) +(rule (call_impl reg (call_site_info info output)) + (let ((_ Unit (emit (MInst.Call reg info)))) + output)) ;; Helper for emitting `MInst.ReturnCall` instructions. (decl return_call_impl (BoxReturnCallInfo) SideEffectNoResult) (rule (return_call_impl info) (SideEffectNoResult.Inst (MInst.ReturnCall info))) -;; Helper for emitting `MInst.ReturnCallInd` instructions. -(decl return_call_ind_impl (BoxReturnCallIndInfo) SideEffectNoResult) -(rule (return_call_ind_impl info) - (SideEffectNoResult.Inst (MInst.ReturnCallInd info))) - ;; Helper for emitting `MInst.Jump` instructions. (decl jump_impl (MachLabel) SideEffectNoResult) (rule (jump_impl target) @@ -2846,15 +2841,6 @@ (rule (arg_store $F64 reg mem) (vec_store_lane $F64X2 reg mem 0)) (rule -1 (arg_store (vr128_ty ty) reg mem) (vec_store reg mem)) -(decl arg_load (Type MemArg) Reg) -(rule (arg_load $I8 mem) (zext32_mem $I8 mem)) -(rule (arg_load $I16 mem) (zext32_mem $I16 mem)) -(rule (arg_load $I32 mem) (load32 mem)) -(rule (arg_load $I64 mem) (load64 mem)) -(rule (arg_load $F32 mem) (vec_load_lane_undef $F32X4 mem 0)) -(rule (arg_load $F64 mem) (vec_load_lane_undef $F64X2 mem 0)) -(rule -1 (arg_load (vr128_ty ty) mem) (vec_load ty mem)) - ;; Helper to perform a lane swap in register. (decl vec_elt_rev (Type Reg) Reg) (rule (vec_elt_rev (multi_lane 64 2) reg) @@ -2900,11 +2886,6 @@ (let ((ptr Reg (load_addr (memarg_offset base offset)))) (copy_reg_to_arg_slot uses lo base slot ptr))) -;; Copy a single argument/return value from its slots. -(decl copy_from_arg (CallRetList LaneOrder MemArg ABIArg) ValueRegs) -(rule (copy_from_arg defs lo base (abi_arg_only_slot slot)) - (value_reg (copy_reg_from_arg_slot defs lo base slot))) - ;; Place one component of an argument/return value into a register. ;; Copy reference values into registers of integer type. ;; Zero- or sign-extend as required by the ABI. @@ -2932,14 +2913,6 @@ (side_effect (arg_store (abi_ext_ty ext ty) (abi_vec_elt_rev lo ty src) (memarg_offset base offset)))) -;; Copy one component of an argument/return value from its slot. -(decl copy_reg_from_arg_slot (CallRetList LaneOrder MemArg ABIArgSlot) Reg) -(rule (copy_reg_from_arg_slot defs lo _ (ABIArgSlot.Reg reg ty ext)) - (abi_vec_elt_rev lo ty (defs_lookup defs reg))) -(rule (copy_reg_from_arg_slot _ lo base (ABIArgSlot.Stack offset ty ext)) - (abi_vec_elt_rev lo ty (arg_load (abi_ext_ty ext ty) - (memarg_offset base offset)))) - ;; Helper to compute the type of an implicitly extended argument/return value. (decl abi_ext_ty (ArgumentExtension Type) Type) (rule 0 (abi_ext_ty _ ty) ty) @@ -3499,35 +3472,15 @@ (decl args_builder_finish (CallArgListBuilder) CallArgList) (extern constructor args_builder_finish args_builder_finish) -;; List of return registers for a call instnuction. -(type CallRetList extern (enum)) - -;; Initialize return register list. -(decl defs_init (Sig) CallRetList) -(extern constructor defs_init defs_init) - -;; Look up return register in list. -(decl defs_lookup (CallRetList RealReg) Reg) -(extern constructor defs_lookup defs_lookup) - (decl abi_sig (SigRef) Sig) (extern constructor abi_sig abi_sig) -(decl abi_first_ret (SigRef Sig) usize) -(extern constructor abi_first_ret abi_first_ret) - -(decl abi_call_info (Sig ExternalName CallArgList CallRetList) BoxCallInfo) -(extern constructor abi_call_info abi_call_info) +(decl abi_call_site_info (Sig CallInstDest CallArgList) CallSiteInfo) +(extern constructor abi_call_site_info abi_call_site_info) -(decl abi_call_ind_info (Sig Reg CallArgList CallRetList) BoxCallIndInfo) -(extern constructor abi_call_ind_info abi_call_ind_info) - -(decl abi_return_call_info (Sig ExternalName CallArgList) BoxReturnCallInfo) +(decl abi_return_call_info (Sig CallInstDest CallArgList) BoxReturnCallInfo) (extern constructor abi_return_call_info abi_return_call_info) -(decl abi_return_call_ind_info (Sig Reg CallArgList) BoxReturnCallIndInfo) -(extern constructor abi_return_call_ind_info abi_return_call_ind_info) - (decl abi_call_stack_args (Sig) MemArg) (extern constructor abi_call_stack_args abi_call_stack_args) @@ -3540,21 +3493,14 @@ (decl writable_link_reg () WritableReg) (rule (writable_link_reg) (writable_gpr 14)) -(decl abi_call (Sig ExternalName CallArgList CallRetList) SideEffectNoResult) -(rule (abi_call abi name uses defs) - (call_impl (writable_link_reg) (abi_call_info abi name uses defs))) - -(decl abi_call_ind (Sig Reg CallArgList CallRetList) SideEffectNoResult) -(rule (abi_call_ind abi target uses defs) - (call_ind_impl (writable_link_reg) (abi_call_ind_info abi target uses defs))) - -(decl abi_return_call (Sig ExternalName CallArgList) SideEffectNoResult) -(rule (abi_return_call abi name uses) - (return_call_impl (abi_return_call_info abi name uses))) +(decl abi_call (Sig CallInstDest CallArgList) InstOutput) +(rule (abi_call abi dest uses) + (let ((info CallSiteInfo (abi_call_site_info abi dest uses))) + (call_impl (writable_link_reg) info))) -(decl abi_return_call_ind (Sig Reg CallArgList) SideEffectNoResult) -(rule (abi_return_call_ind abi target uses) - (return_call_ind_impl (abi_return_call_ind_info abi target uses))) +(decl abi_return_call (Sig CallInstDest CallArgList) SideEffectNoResult) +(rule (abi_return_call abi dest uses) + (return_call_impl (abi_return_call_info abi dest uses))) (decl abi_lane_order (Sig) LaneOrder) (extern constructor abi_lane_order abi_lane_order) diff --git a/cranelift/codegen/src/isa/s390x/inst/args.rs b/cranelift/codegen/src/isa/s390x/inst/args.rs index 5134f9bd39..c4676521c2 100644 --- a/cranelift/codegen/src/isa/s390x/inst/args.rs +++ b/cranelift/codegen/src/isa/s390x/inst/args.rs @@ -63,6 +63,9 @@ pub enum MemArg { /// adjustment meta-instructions). See the diagram in the documentation /// for [crate::isa::aarch64::abi](the ABI module) for more details. SlotOffset { off: i64 }, + + /// Offset into the spill area of the stack. + SpillOffset { off: i64 }, } impl MemArg { @@ -102,6 +105,7 @@ impl MemArg { &MemArg::InitialSPOffset { off } => MemArg::InitialSPOffset { off: off + offset }, &MemArg::NominalSPOffset { off } => MemArg::NominalSPOffset { off: off + offset }, &MemArg::SlotOffset { off } => MemArg::SlotOffset { off: off + offset }, + &MemArg::SpillOffset { off } => MemArg::SpillOffset { off: off + offset }, // This routine is only defined for virtual addressing modes. &MemArg::BXD12 { .. } | &MemArg::BXD20 { .. } @@ -120,6 +124,7 @@ impl MemArg { MemArg::InitialSPOffset { .. } => MemFlags::trusted(), MemArg::NominalSPOffset { .. } => MemFlags::trusted(), MemArg::SlotOffset { .. } => MemFlags::trusted(), + MemArg::SpillOffset { .. } => MemFlags::trusted(), } } } @@ -244,6 +249,7 @@ impl PrettyPrint for MemArg { &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } | &MemArg::SlotOffset { .. } + | &MemArg::SpillOffset { .. } | &MemArg::RegOffset { .. } => { panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!") } diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 5e4bbb8fd3..a564d2f2be 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -68,12 +68,14 @@ pub fn mem_finalize( &MemArg::RegOffset { off, .. } | &MemArg::InitialSPOffset { off } | &MemArg::NominalSPOffset { off } - | &MemArg::SlotOffset { off } => { + | &MemArg::SlotOffset { off } + | &MemArg::SpillOffset { off } => { let base = match mem { &MemArg::RegOffset { reg, .. } => reg, &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } - | &MemArg::SlotOffset { .. } => stack_reg(), + | &MemArg::SlotOffset { .. } + | &MemArg::SpillOffset { .. } => stack_reg(), _ => unreachable!(), }; let adj = match mem { @@ -83,6 +85,11 @@ pub fn mem_finalize( + state.frame_layout().outgoing_args_size + state.nominal_sp_offset, ), + &MemArg::SpillOffset { .. } => i64::from( + state.frame_layout().stackslots_size + + state.frame_layout().outgoing_args_size + + state.nominal_sp_offset, + ), &MemArg::SlotOffset { .. } => { i64::from(state.frame_layout().outgoing_args_size + state.nominal_sp_offset) } @@ -1369,13 +1376,6 @@ impl Inst { ) } - // N.B.: we *must* not exceed the "worst-case size" used to compute - // where to insert islands, except when islands are explicitly triggered - // (with an `EmitIsland`). We check this in debug builds. This is `mut` - // to allow disabling the check for `JTSequence`, which is always - // emitted following an `EmitIsland`. - let mut start_off = sink.cur_offset(); - match self { &Inst::AluRRR { alu_op, rd, rn, rm } => { let (opcode, have_rr) = match alu_op { @@ -3199,63 +3199,54 @@ impl Inst { state.nominal_sp_offset += size; } &Inst::Call { link, ref info } => { - let opcode = 0xc05; // BRASL - - // Add relocation for target function. This has to be done *before* - // the S390xTlsGdCall relocation if any, to ensure linker relaxation - // works correctly. - let offset = sink.cur_offset() + 2; - sink.add_reloc_at_offset(offset, Reloc::S390xPLTRel32Dbl, &info.dest, 2); - + let enc: &[u8] = match &info.dest { + CallInstDest::Direct { name } => { + let offset = sink.cur_offset() + 2; + sink.add_reloc_at_offset(offset, Reloc::S390xPLTRel32Dbl, name, 2); + let opcode = 0xc05; // BRASL + &enc_ril_b(opcode, link.to_reg(), 0) + } + CallInstDest::Indirect { reg } => { + let opcode = 0x0d; // BASR + &enc_rr(opcode, link.to_reg(), *reg) + } + }; if let Some(s) = state.take_stack_map() { - let offset = sink.cur_offset() + 6; + let offset = sink.cur_offset() + enc.len() as u32; sink.push_user_stack_map(state, offset, s); } - - put(sink, &enc_ril_b(opcode, link.to_reg(), 0)); + put(sink, enc); sink.add_call_site(); state.nominal_sp_offset -= info.callee_pop_size; - } - &Inst::CallInd { link, ref info } => { - if let Some(s) = state.take_stack_map() { - let offset = sink.cur_offset() + 2; - sink.push_user_stack_map(state, offset, s); - } - let opcode = 0x0d; // BASR - put(sink, &enc_rr(opcode, link.to_reg(), info.dest)); - sink.add_call_site(); - - state.nominal_sp_offset -= info.callee_pop_size; - } - &Inst::ReturnCall { ref info } => { - for inst in S390xMachineDeps::gen_tail_epilogue( - state.frame_layout(), - info.callee_pop_size, - None, - ) { + for inst in S390xMachineDeps::gen_retval_loads(info) { inst.emit(sink, emit_info, state); } - - let opcode = 0xc04; // BCRL - let offset = sink.cur_offset() + 2; - sink.add_reloc_at_offset(offset, Reloc::S390xPLTRel32Dbl, &info.dest, 2); - put(sink, &enc_ril_c(opcode, 15, 0)); - sink.add_call_site(); } - &Inst::ReturnCallInd { ref info } => { - let mut rn = info.dest; - for inst in S390xMachineDeps::gen_tail_epilogue( + &Inst::ReturnCall { ref info } => { + let (epilogue_insts, temp_dest) = S390xMachineDeps::gen_tail_epilogue( state.frame_layout(), info.callee_pop_size, - Some(&mut rn), - ) { + &info.dest, + ); + for inst in epilogue_insts { inst.emit(sink, emit_info, state); } - let opcode = 0x07; // BCR - put(sink, &enc_rr(opcode, gpr(15), rn)); + let enc: &[u8] = match &info.dest { + CallInstDest::Direct { name } => { + let offset = sink.cur_offset() + 2; + sink.add_reloc_at_offset(offset, Reloc::S390xPLTRel32Dbl, name, 2); + let opcode = 0xc04; // BCRL + &enc_ril_c(opcode, 15, 0) + } + CallInstDest::Indirect { reg } => { + let opcode = 0x07; // BCR + &enc_rr(opcode, gpr(15), temp_dest.unwrap_or(*reg)) + } + }; + put(sink, enc); sink.add_call_site(); } &Inst::ElfTlsGetOffset { ref symbol, .. } => { @@ -3389,10 +3380,6 @@ impl Inst { sink.use_label_at_offset(word_off, target, LabelUse::PCRel32); sink.put4(off_into_table.swap_bytes()); } - - // Lowering produces an EmitIsland before using a JTSequence, so we can safely - // disable the worst-case-size check in this case. - start_off = sink.cur_offset(); } Inst::StackProbeLoop { @@ -3432,9 +3419,6 @@ impl Inst { &Inst::DummyUse { .. } => {} } - let end_off = sink.cur_offset(); - debug_assert!((end_off - start_off) <= Inst::worst_case_size()); - state.clear_post_insn(); } } diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index caaec06707..64b2c44d55 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -6853,7 +6853,9 @@ fn test_s390x_binemit() { Inst::Call { link: writable_gpr(14), info: Box::new(CallInfo::empty( - ExternalName::testcase("test0"), + CallInstDest::Direct { + name: ExternalName::testcase("test0"), + }, CallConv::SystemV, )), }, @@ -6862,9 +6864,12 @@ fn test_s390x_binemit() { )); insns.push(( - Inst::CallInd { + Inst::Call { link: writable_gpr(14), - info: Box::new(CallInfo::empty(gpr(1), CallConv::SystemV)), + info: Box::new(CallInfo::empty( + CallInstDest::Indirect { reg: gpr(1) }, + CallConv::SystemV, + )), }, "0DE1", "basr %r14, %r1", diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 139255c265..a8fc78cd86 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -29,9 +29,9 @@ mod emit_tests; // Instructions (top level): definition pub use crate::isa::s390x::lower::isle::generated_code::{ - ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuRoundOp, LaneOrder, MInst as Inst, - RxSBGOp, ShiftOp, SymbolReloc, UnaryOp, VecBinaryOp, VecFloatCmpOp, VecIntCmpOp, VecShiftOp, - VecUnaryOp, + ALUOp, CallInstDest, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuRoundOp, LaneOrder, + MInst as Inst, RxSBGOp, ShiftOp, SymbolReloc, UnaryOp, VecBinaryOp, VecFloatCmpOp, VecIntCmpOp, + VecShiftOp, VecUnaryOp, }; /// Additional information for (direct) ReturnCall instructions, left out of line to lower the size of @@ -212,9 +212,7 @@ impl Inst { | Inst::VecReplicateLane { .. } | Inst::AllocateArgs { .. } | Inst::Call { .. } - | Inst::CallInd { .. } | Inst::ReturnCall { .. } - | Inst::ReturnCallInd { .. } | Inst::Args { .. } | Inst::Rets { .. } | Inst::Ret { .. } @@ -385,7 +383,8 @@ fn memarg_operands(memarg: &mut MemArg, collector: &mut impl OperandVisitor) { } MemArg::InitialSPOffset { .. } | MemArg::NominalSPOffset { .. } - | MemArg::SlotOffset { .. } => {} + | MemArg::SlotOffset { .. } + | MemArg::SpillOffset { .. } => {} } // mem_finalize might require %r1 to hold (part of) the address. // Conservatively assume this will always be necessary here. @@ -868,24 +867,6 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor {} Inst::Call { link, info, .. } => { - let CallInfo { - uses, - defs, - clobbers, - .. - } = &mut **info; - for CallArgPair { vreg, preg } in uses { - collector.reg_fixed_use(vreg, *preg); - } - let mut clobbers = *clobbers; - clobbers.add(link.to_reg().to_real_reg().unwrap().into()); - for CallRetPair { vreg, preg } in defs { - clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); - collector.reg_fixed_def(vreg, *preg); - } - collector.reg_clobbers(clobbers); - } - Inst::CallInd { link, info } => { let CallInfo { dest, uses, @@ -893,27 +874,32 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor {} + CallInstDest::Indirect { reg } => collector.reg_use(reg), + } for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } let mut clobbers = *clobbers; clobbers.add(link.to_reg().to_real_reg().unwrap().into()); - for CallRetPair { vreg, preg } in defs { - clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + collector.reg_fixed_def(vreg, *preg); + } + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(clobbers); } Inst::ReturnCall { info } => { - let ReturnCallInfo { uses, .. } = &mut **info; - for CallArgPair { vreg, preg } in uses { - collector.reg_fixed_use(vreg, *preg); - } - } - Inst::ReturnCallInd { info } => { let ReturnCallInfo { dest, uses, .. } = &mut **info; - collector.reg_use(dest); + match dest { + CallInstDest::Direct { .. } => {} + CallInstDest::Indirect { reg } => collector.reg_use(reg), + } for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } @@ -1060,7 +1046,6 @@ impl MachInst for Inst { match self { &Inst::Args { .. } => false, &Inst::Call { ref info, .. } => info.caller_conv != info.callee_conv, - &Inst::CallInd { ref info, .. } => info.caller_conv != info.callee_conv, &Inst::ElfTlsGetOffset { .. } => false, _ => true, } @@ -1083,7 +1068,7 @@ impl MachInst for Inst { fn is_term(&self) -> MachTerminator { match self { &Inst::Rets { .. } => MachTerminator::Ret, - &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall, + &Inst::ReturnCall { .. } => MachTerminator::RetCall, &Inst::Jump { .. } => MachTerminator::Uncond, &Inst::CondBr { .. } => MachTerminator::Cond, &Inst::IndirectBr { .. } => MachTerminator::Indirect, @@ -1098,7 +1083,7 @@ impl MachInst for Inst { fn is_safepoint(&self) -> bool { match self { - Inst::Call { .. } | Inst::CallInd { .. } => true, + Inst::Call { .. } => true, _ => false, } } @@ -3119,44 +3104,58 @@ impl Inst { } &Inst::Call { link, ref info } => { let link = link.to_reg(); + let (opcode, dest) = match &info.dest { + CallInstDest::Direct { name } => ("brasl", name.display(None).to_string()), + CallInstDest::Indirect { reg } => ("basr", pretty_print_reg(*reg)), + }; + let mut retval_loads = S390xMachineDeps::gen_retval_loads(info) + .into_iter() + .map(|inst| inst.print_with_state(state)) + .collect::>() + .join(" ; "); + if !retval_loads.is_empty() { + retval_loads = " ; ".to_string() + &retval_loads; + } let callee_pop_size = if info.callee_pop_size > 0 { format!(" ; callee_pop_size {}", info.callee_pop_size) } else { "".to_string() }; format!( - "brasl {}, {}{}", + "{} {}, {}{}{}", + opcode, show_reg(link), - info.dest.display(None), - callee_pop_size + dest, + callee_pop_size, + retval_loads ) } - &Inst::CallInd { link, ref info, .. } => { - let link = link.to_reg(); - let rn = pretty_print_reg(info.dest); - let callee_pop_size = if info.callee_pop_size > 0 { - format!(" ; callee_pop_size {}", info.callee_pop_size) - } else { - "".to_string() - }; - format!("basr {}, {}{}", show_reg(link), rn, callee_pop_size) - } &Inst::ReturnCall { ref info } => { - let callee_pop_size = if info.callee_pop_size > 0 { - format!(" ; callee_pop_size {}", info.callee_pop_size) - } else { - "".to_string() + let (epilogue_insts, temp_dest) = S390xMachineDeps::gen_tail_epilogue( + state.frame_layout(), + info.callee_pop_size, + &info.dest, + ); + let mut epilogue_str = epilogue_insts + .into_iter() + .map(|inst| inst.print_with_state(state)) + .collect::>() + .join(" ; "); + if !epilogue_str.is_empty() { + epilogue_str += " ; "; + } + let (opcode, dest) = match &info.dest { + CallInstDest::Direct { name } => ("jg", name.display(None).to_string()), + CallInstDest::Indirect { reg } => { + ("br", pretty_print_reg(temp_dest.unwrap_or(*reg))) + } }; - format!("return_call {}{}", info.dest.display(None), callee_pop_size) - } - &Inst::ReturnCallInd { ref info } => { - let rn = pretty_print_reg(info.dest); let callee_pop_size = if info.callee_pop_size > 0 { format!(" ; callee_pop_size {}", info.callee_pop_size) } else { "".to_string() }; - format!("return_call_ind {rn}{callee_pop_size}") + format!("{epilogue_str}{opcode} {dest}{callee_pop_size}") } &Inst::ElfTlsGetOffset { ref symbol, .. } => { let dest = match &**symbol { diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index f3bcfc6ec4..3d85be99fc 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -3865,31 +3865,22 @@ ;; Direct call to an in-range function. (rule 1 (lower (call (func_ref_data sig_ref name (reloc_distance_near)) args)) (let ((abi Sig (abi_sig sig_ref)) - (uses CallArgList (lower_call_args abi (range 0 (abi_num_args abi)) args)) - (defs CallRetList (defs_init abi)) - (_ InstOutput (side_effect (abi_call abi name uses defs)))) - (lower_call_rets abi defs (range (abi_first_ret sig_ref abi) - (abi_num_rets abi)) (output_builder_new)))) + (uses CallArgList (lower_call_args abi (range 0 (abi_num_args abi)) args))) + (abi_call abi (CallInstDest.Direct name) uses))) ;; Direct call to an out-of-range function (implicitly via pointer). (rule (lower (call (func_ref_data sig_ref name _) args)) (let ((abi Sig (abi_sig sig_ref)) (uses CallArgList (lower_call_args abi (range 0 (abi_num_args abi)) args)) - (defs CallRetList (defs_init abi)) - (target Reg (load_symbol_reloc (SymbolReloc.Absolute name 0))) - (_ InstOutput (side_effect (abi_call_ind abi target uses defs)))) - (lower_call_rets abi defs (range (abi_first_ret sig_ref abi) - (abi_num_rets abi)) (output_builder_new)))) + (target Reg (load_symbol_reloc (SymbolReloc.Absolute name 0)))) + (abi_call abi (CallInstDest.Indirect target) uses))) ;; Indirect call. (rule (lower (call_indirect sig_ref ptr args)) (let ((abi Sig (abi_sig sig_ref)) (target Reg (put_in_reg ptr)) - (uses CallArgList (lower_call_args abi (range 0 (abi_num_args abi)) args)) - (defs CallRetList (defs_init abi)) - (_ InstOutput (side_effect (abi_call_ind abi target uses defs)))) - (lower_call_rets abi defs (range (abi_first_ret sig_ref abi) - (abi_num_rets abi)) (output_builder_new)))) + (uses CallArgList (lower_call_args abi (range 0 (abi_num_args abi)) args))) + (abi_call abi (CallInstDest.Indirect target) uses))) ;; Lower function arguments. (decl lower_call_args (Sig Range ValueSlice) CallArgList) @@ -3901,16 +3892,6 @@ (_ InstOutput (lower_call_ret_arg abi uses stack))) (args_builder_finish uses))) -;; Lower function return values by collecting them from registers / stack slots. -(decl lower_call_rets (Sig CallRetList Range InstOutputBuilder) InstOutput) -(rule (lower_call_rets abi _ (range_empty) builder) (output_builder_finish builder)) -(rule (lower_call_rets abi defs (range_unwrap head tail) builder) - (let ((ret ValueRegs (copy_from_arg defs (abi_lane_order abi) - (abi_call_stack_rets abi) - (abi_get_ret abi head))) - (_ Unit (output_builder_push builder ret))) - (lower_call_rets abi defs tail builder))) - ;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;; @@ -3918,21 +3899,21 @@ (rule 1 (lower (return_call (func_ref_data sig_ref name (reloc_distance_near)) args)) (let ((abi Sig (abi_sig sig_ref)) (uses CallArgList (lower_return_call_args abi (range 0 (abi_num_args abi)) args))) - (side_effect (abi_return_call abi name uses)))) + (side_effect (abi_return_call abi (CallInstDest.Direct name) uses)))) ;; Direct tail call to an out-of-range function (implicitly via pointer). (rule (lower (return_call (func_ref_data sig_ref name _) args)) (let ((abi Sig (abi_sig sig_ref)) (uses CallArgList (lower_return_call_args abi (range 0 (abi_num_args abi)) args)) (target Reg (load_symbol_reloc (SymbolReloc.Absolute name 0)))) - (side_effect (abi_return_call_ind abi target uses)))) + (side_effect (abi_return_call abi (CallInstDest.Indirect target) uses)))) ;; Indirect tail call. (rule (lower (return_call_indirect sig_ref ptr args)) (let ((abi Sig (abi_sig sig_ref)) (target Reg (put_in_reg ptr)) (uses CallArgList (lower_return_call_args abi (range 0 (abi_num_args abi)) args))) - (side_effect (abi_return_call_ind abi target uses)))) + (side_effect (abi_return_call abi (CallInstDest.Indirect target) uses)))) ;; Lower tail call function arguments. (decl lower_return_call_args (Sig Range ValueSlice) CallArgList) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index c65aa9c218..4226c8f983 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -7,12 +7,12 @@ use crate::ir::ExternalName; // Types that the generated ISLE code uses via `use super::*`. use crate::isa::s390x::abi::{S390xMachineDeps, REG_SAVE_AREA_SIZE}; use crate::isa::s390x::inst::{ - gpr, stack_reg, writable_gpr, zero_reg, Cond, Inst as MInst, LaneOrder, MemArg, RegPair, - ReturnCallInfo, SymbolReloc, UImm12, UImm16Shifted, UImm32Shifted, WritableRegPair, + gpr, stack_reg, writable_gpr, zero_reg, CallInstDest, Cond, Inst as MInst, LaneOrder, MemArg, + RegPair, ReturnCallInfo, SymbolReloc, UImm12, UImm16Shifted, UImm32Shifted, WritableRegPair, }; use crate::isa::s390x::S390xBackend; -use crate::machinst::isle::*; -use crate::machinst::{CallInfo, MachLabel, Reg}; +use crate::machinst::{isle::*, RetLocation}; +use crate::machinst::{CallInfo, MachLabel, Reg, StackAMode}; use crate::{ ir::{ condcodes::*, immediates::*, types::*, ArgumentExtension, ArgumentPurpose, AtomicRmwOp, @@ -32,10 +32,8 @@ use std::boxed::Box; use std::cell::Cell; use std::vec::Vec; -type BoxCallInfo = Box>; -type BoxCallIndInfo = Box>; -type BoxReturnCallInfo = Box>; -type BoxReturnCallIndInfo = Box>; +type BoxCallInfo = Box>; +type BoxReturnCallInfo = Box>; type VecMachLabel = Vec; type BoxExternalName = Box; type BoxSymbolReloc = Box; @@ -43,6 +41,7 @@ type VecMInst = Vec; type VecMInstBuilder = Cell>; type VecArgPair = Vec; type CallArgListBuilder = Cell; +type CallSiteInfo = (BoxCallInfo, InstOutput); /// The main entry point for lowering with ISLE. pub(crate) fn lower( @@ -118,57 +117,12 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { builder.take() } - fn defs_init(&mut self, abi: Sig) -> CallRetList { - // Allocate writable registers for all retval regs, except for StructRet args. - let mut defs = smallvec![]; - for i in 0..self.lower_ctx.sigs().num_rets(abi) { - if let &ABIArg::Slots { - ref slots, purpose, .. - } = &self.lower_ctx.sigs().get_ret(abi, i) - { - if purpose == ArgumentPurpose::StructReturn { - continue; - } - for slot in slots { - match slot { - &ABIArgSlot::Reg { reg, ty, .. } => { - let value_regs = self.lower_ctx.alloc_tmp(ty); - defs.push(CallRetPair { - vreg: value_regs.only_reg().unwrap(), - preg: reg.into(), - }); - } - _ => {} - } - } - } - } - defs - } - - fn defs_lookup(&mut self, defs: &CallRetList, reg: RealReg) -> Reg { - let reg = Reg::from(reg); - for def in defs { - if def.preg == reg { - return def.vreg.to_reg(); - } - } - unreachable!() - } - fn abi_sig(&mut self, sig_ref: SigRef) -> Sig { self.lower_ctx.sigs().abi_sig_for_sig_ref(sig_ref) } - fn abi_first_ret(&mut self, sig_ref: SigRef, abi: Sig) -> usize { - // Return the index of the first actual return value, excluding - // any StructReturn that might have been added to Sig. - let sig = &self.lower_ctx.dfg().signatures[sig_ref]; - self.lower_ctx.sigs().num_rets(abi) - sig.returns.len() - } - fn abi_lane_order(&mut self, abi: Sig) -> LaneOrder { - lane_order_for_call_conv(self.lower_ctx.sigs()[abi].call_conv()) + LaneOrder::from(self.lower_ctx.sigs()[abi].call_conv()) } fn abi_call_stack_args(&mut self, abi: Sig) -> MemArg { @@ -218,7 +172,9 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { self.lower_ctx .abi_mut() .accumulate_outgoing_args_size(arg_space + ret_space); - MemArg::reg_plus_off(stack_reg(), arg_space.into(), MemFlags::trusted()) + MemArg::NominalSPOffset { + off: arg_space.into(), + } } else { // Tail-call ABI: buffer for outgoing return values is at the // bottom of the caller's frame (above the register save area). @@ -259,51 +215,97 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { MemArg::InitialSPOffset { off: 0 } } - fn abi_call_info( + fn abi_call_site_info( &mut self, abi: Sig, - dest: ExternalName, + dest: &CallInstDest, uses: &CallArgList, - defs: &CallRetList, - ) -> BoxCallInfo { - Box::new(self.abi_call_info_no_dest(abi, uses, defs).map(|()| dest)) - } - - fn abi_call_ind_info( - &mut self, - abi: Sig, - dest: Reg, - uses: &CallArgList, - defs: &CallRetList, - ) -> BoxCallIndInfo { - Box::new(self.abi_call_info_no_dest(abi, uses, defs).map(|()| dest)) - } + ) -> CallSiteInfo { + // Determine return buffer address. + let ret_area_base = match &self.abi_call_stack_rets(abi) { + &MemArg::NominalSPOffset { off } => off, + _ => unreachable!(), + }; + // Helper routine to compute the type after argument extension. + let ext_ty = |ty, extension| match (ty, extension) { + (ty, ArgumentExtension::None) => ty, + (I8, _) => I64, + (I16, _) => I64, + (I32, _) => I64, + _ => ty, + }; + // Allocate writable registers for all retval regs, except for StructRet args. + let mut defs: CallRetList = smallvec![]; + let mut outputs = InstOutput::new(); + for i in 0..self.lower_ctx.sigs().num_rets(abi) { + if let &ABIArg::Slots { + ref slots, purpose, .. + } = &self.lower_ctx.sigs().get_ret(abi, i) + { + if purpose == ArgumentPurpose::StructReturn { + continue; + } + // Our ABI always uses a single slot. + debug_assert_eq!(slots.len(), 1); + match &slots[0] { + &ABIArgSlot::Reg { reg, ty, extension } => { + let ty = ext_ty(ty, extension); + let into_reg = self.lower_ctx.alloc_tmp(ty).only_reg().unwrap(); + defs.push(CallRetPair { + vreg: into_reg, + location: RetLocation::Reg(reg.into(), ty), + }); + outputs.push(ValueRegs::one(into_reg.to_reg())); + } + &ABIArgSlot::Stack { + offset, + ty, + extension, + } => { + let ty = ext_ty(ty, extension); + let into_reg = self.lower_ctx.alloc_tmp(ty).only_reg().unwrap(); + let amode = StackAMode::OutgoingArg(offset + ret_area_base); + defs.push(CallRetPair { + vreg: into_reg, + location: RetLocation::Stack(amode, ty), + }); + outputs.push(ValueRegs::one(into_reg.to_reg())); + } + } + } + } - fn abi_return_call_info( - &mut self, - abi: Sig, - name: ExternalName, - uses: &CallArgList, - ) -> BoxReturnCallInfo { let sig_data = &self.lower_ctx.sigs()[abi]; - let callee_pop_size = sig_data.sized_stack_arg_space() as u32; - Box::new(ReturnCallInfo { - dest: name.clone(), + // Get clobbers: all caller-saves. These may include return value + // regs, which we will remove from the clobber set later. + let clobbers = S390xMachineDeps::get_regs_clobbered_by_call(sig_data.call_conv()); + let callee_pop_size = if sig_data.call_conv() == CallConv::Tail { + sig_data.sized_stack_arg_space() as u32 + } else { + 0 + }; + let info = Box::new(CallInfo { + dest: dest.clone(), uses: uses.clone(), + defs: defs, + clobbers, callee_pop_size, - }) + caller_conv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + callee_conv: self.lower_ctx.sigs()[abi].call_conv(), + }); + (info, outputs) } - fn abi_return_call_ind_info( + fn abi_return_call_info( &mut self, abi: Sig, - target: Reg, + dest: &CallInstDest, uses: &CallArgList, - ) -> BoxReturnCallIndInfo { + ) -> BoxReturnCallInfo { let sig_data = &self.lower_ctx.sigs()[abi]; let callee_pop_size = sig_data.sized_stack_arg_space() as u32; Box::new(ReturnCallInfo { - dest: target, + dest: dest.clone(), uses: uses.clone(), callee_pop_size, }) @@ -315,6 +317,11 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { .accumulate_outgoing_args_size(REG_SAVE_AREA_SIZE); } + #[inline] + fn call_site_info_split(&mut self, site: CallSiteInfo) -> (BoxCallInfo, InstOutput) { + site + } + #[inline] fn box_symbol_reloc(&mut self, symbol_reloc: &SymbolReloc) -> BoxSymbolReloc { Box::new(symbol_reloc.clone()) @@ -524,7 +531,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { #[inline] fn lane_order(&mut self) -> LaneOrder { - lane_order_for_call_conv(self.lower_ctx.abi().call_conv(self.lower_ctx.sigs())) + LaneOrder::from(self.lower_ctx.abi().call_conv(self.lower_ctx.sigs())) } #[inline] @@ -973,43 +980,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { } } -impl IsleContext<'_, '_, MInst, S390xBackend> { - fn abi_call_info_no_dest( - &mut self, - abi: Sig, - uses: &CallArgList, - defs: &CallRetList, - ) -> CallInfo<()> { - let sig_data = &self.lower_ctx.sigs()[abi]; - // Get clobbers: all caller-saves. These may include return value - // regs, which we will remove from the clobber set later. - let clobbers = S390xMachineDeps::get_regs_clobbered_by_call(sig_data.call_conv()); - let callee_pop_size = if sig_data.call_conv() == CallConv::Tail { - sig_data.sized_stack_arg_space() as u32 - } else { - 0 - }; - CallInfo { - dest: (), - uses: uses.clone(), - defs: defs.clone(), - clobbers, - callee_pop_size, - caller_conv: self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), - callee_conv: self.lower_ctx.sigs()[abi].call_conv(), - } - } -} - -/// Lane order to be used for a given calling convention. -#[inline] -fn lane_order_for_call_conv(call_conv: CallConv) -> LaneOrder { - match call_conv { - CallConv::Tail => LaneOrder::LittleEndian, - _ => LaneOrder::BigEndian, - } -} - /// Zero-extend the low `from_bits` bits of `value` to a full u64. #[inline] fn zero_extend_to_u64(value: u64, from_bits: u8) -> u64 { diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 18bbee0346..c1bbff6979 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -929,6 +929,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { _is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { @@ -968,10 +969,18 @@ impl ABIMachineSpec for X64ABIMachineSpec { setup_area_size, clobber_size, fixed_frame_storage_size, + stackslots_size, outgoing_args_size, clobbered_callee_saves: regs, } } + + fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable { + // Use r11 as a temp: clobbered anyway, and + // not otherwise used as a return value in any of our + // supported calling conventions. + Writable::from_reg(regs::r11()) + } } impl X64CallSite { @@ -1126,7 +1135,8 @@ fn get_intreg_for_retval( 5 => Some(regs::r8()), 6 => Some(regs::r9()), 7 => Some(regs::r10()), - 8 => Some(regs::r11()), + // NB: `r11` is reserved as a scratch register that is + // also part of the clobber set. // NB: `r15` is reserved as a scratch register. _ => None, }, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index b812fab48b..01f8e281e0 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1624,6 +1624,13 @@ pub(crate) fn emit( ) .emit(sink, info, state); } + + // Load any stack-carried return values. + call_info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, info, state), + |_space_needed| None, + ); } Inst::ReturnCallKnown { info: call_info } => { @@ -1706,6 +1713,13 @@ pub(crate) fn emit( ) .emit(sink, info, state); } + + // Load any stack-carried return values. + call_info.emit_retval_loads::( + state.frame_layout().stackslots_size, + |inst| inst.emit(sink, info, state), + |_space_needed| None, + ); } Inst::Args { .. } => {} diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 033e96c8f9..5ee15133c2 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2445,8 +2445,11 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(*clobbers); } @@ -2472,8 +2475,11 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); + for CallRetPair { vreg, location } in defs { + match location { + RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg), + RetLocation::Stack(..) => collector.any_def(vreg), + } } collector.reg_clobbers(*clobbers); } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index eea24464a4..d8b0d4ce9e 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -179,20 +179,14 @@ fn emit_vm_call( abi.gen_arg(ctx, i, ValueRegs::one(*input)); } - let mut retval_insts: SmallInstVec<_> = smallvec![]; let mut outputs: SmallVec<[_; 1]> = smallvec![]; for i in 0..ctx.sigs().num_rets(ctx.sigs().abi_sig_for_signature(&sig)) { - let (retval_inst, retval_regs) = abi.gen_retval(ctx, i); - retval_insts.extend(retval_inst.into_iter()); + let retval_regs = abi.gen_retval(ctx, i); outputs.push(retval_regs.only_reg().unwrap()); } abi.emit_call(ctx); - for inst in retval_insts { - ctx.emit(inst); - } - Ok(outputs) } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index fbe14d6c2c..c18c528405 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -282,6 +282,20 @@ pub enum StackAMode { OutgoingArg(i64), } +impl StackAMode { + fn offset_by(&self, offset: u32) -> Self { + match self { + StackAMode::IncomingArg(off, size) => { + StackAMode::IncomingArg(off.checked_add(i64::from(offset)).unwrap(), *size) + } + StackAMode::Slot(off) => StackAMode::Slot(off.checked_add(i64::from(offset)).unwrap()), + StackAMode::OutgoingArg(off) => { + StackAMode::OutgoingArg(off.checked_add(i64::from(offset)).unwrap()) + } + } + } +} + /// Trait implemented by machine-specific backend to represent ISA flags. pub trait IsaFlags: Clone { /// Get a flag indicating whether forward-edge CFI is enabled. @@ -479,6 +493,7 @@ pub trait ABIMachineSpec { is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, + stackslots_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout; @@ -578,6 +593,12 @@ pub trait ABIMachineSpec { call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension; + + /// Get a temporary register that is available to use after a call + /// completes and that does not interfere with register-carried + /// return values. This is used to move stack-carried return + /// values directly into spillslots if needed. + fn retval_temp_reg(call_conv_of_callee: isa::CallConv) -> Writable; } /// Out-of-line data for calls, to keep the size of `Inst` down. @@ -1018,6 +1039,9 @@ pub struct FrameLayout { /// This contains stack slots and spill slots. pub fixed_frame_storage_size: u32, + /// The size of all stackslots. + pub stackslots_size: u32, + /// Stack size to be reserved for outgoing arguments, if used by /// the current ABI, or 0 otherwise. After gen_clobber_save and /// before gen_clobber_restore, the stack pointer points to the @@ -1760,6 +1784,7 @@ impl Callee { self.is_leaf, self.stack_args_size(sigs), self.tail_args_size, + self.stackslots_size, total_stacksize, self.outgoing_args_size, )); @@ -1962,13 +1987,23 @@ pub struct CallArgPair { } /// An output return value from a call instruction: the vreg that is -/// defined, and the preg it is constrained to (per the ABI). +/// defined, and the preg or stack location it is constrained to (per +/// the ABI). #[derive(Clone, Debug)] pub struct CallRetPair { /// The virtual register to define from this return value. pub vreg: Writable, /// The real register from which the return value is read. - pub preg: Reg, + pub location: RetLocation, +} + +/// A location to load a return-value from after a call completes. +#[derive(Clone, Debug)] +pub enum RetLocation { + /// A physical register. + Reg(Reg, Type), + /// A stack location, identified by a `StackAMode`. + Stack(StackAMode, Type), } pub type CallArgList = SmallVec<[CallArgPair; 8]>; @@ -2297,12 +2332,7 @@ impl CallSite { } /// Define a return value after the call returns. - pub fn gen_retval( - &mut self, - ctx: &mut Lower, - idx: usize, - ) -> (SmallInstVec, ValueRegs) { - let mut insts = smallvec![]; + pub fn gen_retval(&mut self, ctx: &mut Lower, idx: usize) -> ValueRegs { let mut into_regs: SmallVec<[Reg; 2]> = smallvec![]; let ret = ctx.sigs().rets(self.sig)[idx].clone(); match ret { @@ -2315,7 +2345,7 @@ impl CallSite { let into_reg = ctx.alloc_tmp(ty).only_reg().unwrap(); self.defs.push(CallRetPair { vreg: into_reg, - preg: reg.into(), + location: RetLocation::Reg(reg.into(), ty), }); into_regs.push(into_reg.to_reg()); } @@ -2326,11 +2356,11 @@ impl CallSite { // ensuring that the return values will be in a consistent place after // any call. let ret_area_base = sig_data.sized_stack_arg_space(); - insts.push(M::gen_load_stack( - StackAMode::OutgoingArg(offset + ret_area_base), - into_reg, - ty, - )); + let amode = StackAMode::OutgoingArg(offset + ret_area_base); + self.defs.push(CallRetPair { + vreg: into_reg, + location: RetLocation::Stack(amode, ty), + }); into_regs.push(into_reg.to_reg()); } } @@ -2349,7 +2379,7 @@ impl CallSite { [a, b] => ValueRegs::two(a, b), _ => panic!("Expected to see one or two slots only from {ret:?}"), }; - (insts, value_regs) + value_regs } /// Emit the call itself. @@ -2386,7 +2416,9 @@ impl CallSite { // Remove retval regs from clobbers. for def in &defs { - clobbers.remove(PReg::from(def.preg.to_real_reg().unwrap())); + if let RetLocation::Reg(preg, ..) = def.location { + clobbers.remove(PReg::from(preg.to_real_reg().unwrap())); + } } clobbers @@ -2438,6 +2470,87 @@ impl CallSite { } } +impl CallInfo { + /// Emit loads for any stack-carried return values using the call + /// info and allocations. + pub fn emit_retval_loads< + M: ABIMachineSpec, + EmitFn: FnMut(M::I), + IslandFn: Fn(u32) -> Option, + >( + &self, + stackslots_size: u32, + mut emit: EmitFn, + emit_island: IslandFn, + ) { + // Count stack-ret locations and emit an island to account for + // this space usage. + let mut space_needed = 0; + for CallRetPair { location, .. } in &self.defs { + if let RetLocation::Stack(..) = location { + // Assume up to ten instructions, semi-arbitrarily: + // load from stack, store to spillslot, codegen of + // large offsets on RISC ISAs. + space_needed += 10 * M::I::worst_case_size(); + } + } + if space_needed > 0 { + if let Some(island_inst) = emit_island(space_needed) { + emit(island_inst); + } + } + + let temp = M::retval_temp_reg(self.callee_conv); + // The temporary must be noted as clobbered. + debug_assert!(M::get_regs_clobbered_by_call(self.callee_conv) + .contains(PReg::from(temp.to_reg().to_real_reg().unwrap()))); + + for CallRetPair { vreg, location } in &self.defs { + match location { + RetLocation::Reg(preg, ..) => { + // The temporary must not also be an actual return + // value register. + debug_assert!(*preg != temp.to_reg()); + } + RetLocation::Stack(amode, ty) => { + if let Some(spillslot) = vreg.to_reg().to_spillslot() { + // `temp` is an integer register of machine word + // width, but `ty` may be floating-point/vector, + // which (i) may not be loadable directly into an + // int reg, and (ii) may be wider than a machine + // word. For simplicity, and because there are not + // always easy choices for volatile float/vec regs + // (see e.g. x86-64, where fastcall clobbers only + // xmm0-xmm5, but tail uses xmm0-xmm7 for + // returns), we use the integer temp register in + // steps. + let parts = (ty.bytes() + M::word_bytes() - 1) / M::word_bytes(); + for part in 0..parts { + emit(M::gen_load_stack( + amode.offset_by(part * M::word_bytes()), + temp, + M::word_type(), + )); + emit(M::gen_store_stack( + StackAMode::Slot( + i64::from(stackslots_size) + + i64::from(M::word_bytes()) + * ((spillslot.index() as i64) + (part as i64)), + ), + temp.to_reg(), + M::word_type(), + )); + } + } else { + assert_ne!(*vreg, temp); + emit(M::gen_load_stack(*amode, *vreg, *ty)); + } + } + } + } + } +} + #[cfg(test)] mod tests { use super::SigData; diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 8875097f1c..8066d89d77 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -889,26 +889,19 @@ pub fn gen_call_common( gen_call_common_args(ctx, &mut caller, args); // Handle retvals prior to emitting call, so the - // constraints are on the call instruction; but buffer the - // instructions till after the call. + // constraints are on the call instruction. let mut outputs = InstOutput::new(); - let mut retval_insts = crate::machinst::abi::SmallInstVec::new(); // We take the *last* `num_rets` returns of the sig: // this skips a StructReturn, if any, that is present. let sigdata_num_rets = caller.num_rets(ctx.sigs()); debug_assert!(num_rets <= sigdata_num_rets); for i in (sigdata_num_rets - num_rets)..sigdata_num_rets { - let (retval_inst, retval_regs) = caller.gen_retval(ctx, i); - retval_insts.extend(retval_inst.into_iter()); + let retval_regs = caller.gen_retval(ctx, i); outputs.push(retval_regs); } caller.emit_call(ctx); - for inst in retval_insts { - ctx.emit(inst); - } - outputs } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 8ffc68b4e8..fce309471a 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -112,7 +112,8 @@ pub trait MachInst: Clone + Debug { /// Is this an "args" pseudoinst? fn is_args(&self) -> bool; - /// Should this instruction be included in the clobber-set? + /// Should this instruction's clobber-list be included in the + /// clobber-set? fn is_included_in_clobbers(&self) -> bool; /// Does this instruction access memory? diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 2670e3ad12..5a6ea1f9b3 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -38,27 +38,41 @@ pub fn first_user_vreg_index() -> usize { PINNED_VREGS } -/// A register named in an instruction. This register can be either a -/// virtual register or a fixed physical register. It does not have -/// any constraints applied to it: those can be added later in -/// `MachInst::get_operands()` when the `Reg`s are converted to -/// `Operand`s. +/// A register named in an instruction. This register can be a virtual +/// register, a fixed physical register, or a named spillslot (after +/// regalloc). It does not have any constraints applied to it: those +/// can be added later in `MachInst::get_operands()` when the `Reg`s +/// are converted to `Operand`s. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct Reg(VReg); +pub struct Reg(u32); + +const REG_SPILLSLOT_BIT: u32 = 0x8000_0000; +const REG_SPILLSLOT_MASK: u32 = !REG_SPILLSLOT_BIT; impl Reg { /// Get the physical register (`RealReg`), if this register is /// one. pub fn to_real_reg(self) -> Option { - pinned_vreg_to_preg(self.0).map(RealReg) + pinned_vreg_to_preg(self.0.into()).map(RealReg) } /// Get the virtual (non-physical) register, if this register is /// one. pub fn to_virtual_reg(self) -> Option { - if pinned_vreg_to_preg(self.0).is_none() { - Some(VirtualReg(self.0)) + if self.to_spillslot().is_some() { + None + } else if pinned_vreg_to_preg(self.0.into()).is_none() { + Some(VirtualReg(self.0.into())) + } else { + None + } + } + + /// Get the spillslot, if this register is one. + pub fn to_spillslot(self) -> Option { + if (self.0 & REG_SPILLSLOT_BIT) != 0 { + Some(SpillSlot::new((self.0 & REG_SPILLSLOT_MASK) as usize)) } else { None } @@ -66,7 +80,8 @@ impl Reg { /// Get the class of this register. pub fn class(self) -> RegClass { - self.0.class() + assert!(!self.to_spillslot().is_some()); + VReg::from(self.0).class() } /// Is this a real (physical) reg? @@ -78,12 +93,19 @@ impl Reg { pub fn is_virtual(self) -> bool { self.to_virtual_reg().is_some() } + + /// Is this a spillslot? + pub fn is_spillslot(self) -> bool { + self.to_spillslot().is_some() + } } impl std::fmt::Debug for Reg { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if self.0 == VReg::invalid() { + if VReg::from(self.0) == VReg::invalid() { write!(f, "") + } else if let Some(spillslot) = self.to_spillslot() { + write!(f, "{spillslot}") } else if let Some(rreg) = self.to_real_reg() { let preg: PReg = rreg.into(); write!(f, "{preg}") @@ -197,7 +219,7 @@ impl Writable { impl std::convert::From for Reg { fn from(vreg: regalloc2::VReg) -> Reg { - Reg(vreg) + Reg(vreg.bits() as u32) } } @@ -213,12 +235,12 @@ impl std::convert::From for regalloc2::VReg { /// registers also map to particular (special) VRegs, so this /// method can be used either on virtual or physical `Reg`s. fn from(reg: Reg) -> regalloc2::VReg { - reg.0 + reg.0.into() } } impl std::convert::From<&Reg> for regalloc2::VReg { fn from(reg: &Reg) -> regalloc2::VReg { - reg.0 + reg.0.into() } } @@ -256,19 +278,25 @@ impl std::convert::From for Reg { impl std::convert::From for Reg { fn from(reg: RealReg) -> Reg { - Reg(reg.into()) + Reg(VReg::from(reg).bits() as u32) } } impl std::convert::From for Reg { fn from(reg: VirtualReg) -> Reg { - Reg(reg.0) + Reg(reg.0.bits() as u32) } } /// A spill slot. pub type SpillSlot = regalloc2::SpillSlot; +impl std::convert::From for Reg { + fn from(spillslot: regalloc2::SpillSlot) -> Reg { + Reg(REG_SPILLSLOT_BIT | spillslot.index() as u32) + } +} + /// A register class. Each register in the ISA has one class, and the /// classes are disjoint. Most modern ISAs will have just two classes: /// the integer/general-purpose registers (GPRs), and the float/vector @@ -429,6 +457,19 @@ pub trait OperandVisitorImpl: OperandVisitor { self.add_operand(reg, constraint, OperandKind::Def, OperandPos::Late); } } + + /// Add a def that can be allocated to either a register or a + /// spillslot, at the end of the instruction (`After` + /// position). Use only when this def will be written after all + /// uses are read. + fn any_def(&mut self, reg: &mut Writable>) { + self.add_operand( + reg.reg.as_mut(), + OperandConstraint::Any, + OperandKind::Def, + OperandPos::Late, + ); + } } impl OperandVisitorImpl for T {} @@ -441,9 +482,10 @@ impl<'a, F: Fn(VReg) -> VReg> OperandVisitor for OperandCollector<'a, F> { kind: OperandKind, pos: OperandPos, ) { - reg.0 = (self.renamer)(reg.0); + debug_assert!(!reg.is_spillslot()); + reg.0 = (self.renamer)(VReg::from(reg.0)).bits() as u32; self.operands - .push(Operand::new(reg.0, constraint, kind, pos)); + .push(Operand::new(VReg::from(reg.0), constraint, kind, pos)); } fn debug_assert_is_allocatable_preg(&self, reg: PReg, expected: bool) { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 45c58a78ae..dd6f48668b 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -672,16 +672,6 @@ impl VCode { } for (i, range) in self.operand_ranges.iter() { - // Skip this instruction if not "included in clobbers" as - // per the MachInst. (Some backends use this to implement - // ABI specifics; e.g., excluding calls of the same ABI as - // the current function from clobbers, because by - // definition everything clobbered by the call can be - // clobbered by this function without saving as well.) - if !self.insts[i].is_included_in_clobbers() { - continue; - } - let operands = &self.operands[range.clone()]; let allocs = ®alloc.allocs[range]; for (operand, alloc) in operands.iter().zip(allocs.iter()) { @@ -693,8 +683,28 @@ impl VCode { } // Also add explicitly-clobbered registers. - if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { - clobbered.union_from(inst_clobbered); + // + // Skip merging this instruction's clobber list if not + // "included in clobbers" as per the MachInst. (Some + // backends use this to implement ABI specifics; e.g., + // excluding calls of the same ABI as the current function + // from clobbers, because by definition everything + // clobbered by the call can be clobbered by this function + // without saving as well. + // + // This is important for a particular optimization: when + // some registers are "half-clobbered", e.g. vector/float + // registers on aarch64, we want them to be seen as + // clobbered by regalloc so it avoids carrying values + // across calls in these registers but not seen as + // clobbered by prologue generation here (because the + // actual half-clobber implied by the clobber list fits + // within the clobbers that we allow without + // clobber-saves). + if self.insts[i].is_included_in_clobbers() { + if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) { + clobbered.union_from(inst_clobbered); + } } } @@ -933,17 +943,19 @@ impl VCode { let mut allocs = regalloc.inst_allocs(iix).iter(); self.insts[iix.index()].get_operands( &mut |reg: &mut Reg, constraint, _kind, _pos| { - let alloc = allocs - .next() - .expect("enough allocations for all operands") - .as_reg() - .expect("only register allocations, not stack allocations") - .into(); - - if let OperandConstraint::FixedReg(rreg) = constraint { - debug_assert_eq!(Reg::from(rreg), alloc); + let alloc = + allocs.next().expect("enough allocations for all operands"); + + if let Some(alloc) = alloc.as_reg() { + let alloc: Reg = alloc.into(); + if let OperandConstraint::FixedReg(rreg) = constraint { + debug_assert_eq!(Reg::from(rreg), alloc); + } + *reg = alloc; + } else if let Some(alloc) = alloc.as_stack() { + let alloc: Reg = alloc.into(); + *reg = alloc; } - *reg = alloc; }, ); debug_assert!(allocs.next().is_none()); diff --git a/cranelift/control/Cargo.toml b/cranelift/control/Cargo.toml index 87ad5a81e0..9b05295190 100644 --- a/cranelift/control/Cargo.toml +++ b/cranelift/control/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-control" -version = "0.119.0" +version = "0.120.0" description = "White-box fuzz testing framework" license = "Apache-2.0 WITH LLVM-exception" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/entity/Cargo.toml b/cranelift/entity/Cargo.toml index bcc00959f4..5a14502294 100644 --- a/cranelift/entity/Cargo.toml +++ b/cranelift/entity/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-entity" -version = "0.119.0" +version = "0.120.0" description = "Data structures using entity references as mapping keys" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-entity" diff --git a/cranelift/filetests/filetests/isa/aarch64/return-call.clif b/cranelift/filetests/filetests/isa/aarch64/return-call.clif index 46a9ce746c..1e018b3c4a 100644 --- a/cranelift/filetests/filetests/isa/aarch64/return-call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/return-call.clif @@ -463,18 +463,18 @@ block2: ; stp x19, x20, [sp, #-16]! ; sub sp, sp, #16 ; block0: -; movz x3, #10 -; str x3, [sp] +; movz x14, #10 +; str x14, [sp] ; movz x3, #15 ; movz x4, #20 ; movz x5, #25 ; movz x6, #30 ; movz x7, #35 -; movz x25, #40 -; movz x21, #45 -; movz x28, #50 -; movz x27, #55 -; movz x26, #60 +; movz x21, #40 +; movz x28, #45 +; movz x27, #50 +; movz x26, #55 +; movz x25, #60 ; movz x24, #65 ; movz x23, #70 ; movz x22, #75 @@ -493,11 +493,11 @@ block2: ; cbnz x2, label2 ; b label1 ; block1: ; movz x2, #140 -; str x25, [sp, #112] -; str x21, [sp, #120] -; str x28, [sp, #128] -; str x27, [sp, #136] -; str x26, [sp, #144] +; str x21, [sp, #112] +; str x28, [sp, #120] +; str x27, [sp, #128] +; str x26, [sp, #136] +; str x25, [sp, #144] ; str x24, [sp, #152] ; str x23, [sp, #160] ; str x22, [sp, #168] @@ -519,11 +519,11 @@ block2: ; return_call_ind x1 new_stack_arg_size:176 x2=x2 x3=x3 x4=x4 x5=x5 x6=x6 x7=x7 ; block2: ; ldr x2, [sp] -; str x25, [sp, #128] -; str x21, [sp, #136] -; str x28, [sp, #144] -; str x27, [sp, #152] -; str x26, [sp, #160] +; str x21, [sp, #128] +; str x28, [sp, #136] +; str x27, [sp, #144] +; str x26, [sp, #152] +; str x25, [sp, #160] ; str x24, [sp, #168] ; str x23, [sp, #176] ; str x22, [sp, #184] @@ -557,18 +557,18 @@ block2: ; stp x19, x20, [sp, #-0x10]! ; sub sp, sp, #0x10 ; block1: ; offset 0x30 -; mov x3, #0xa -; stur x3, [sp] +; mov x14, #0xa +; stur x14, [sp] ; mov x3, #0xf ; mov x4, #0x14 ; mov x5, #0x19 ; mov x6, #0x1e ; mov x7, #0x23 -; mov x25, #0x28 -; mov x21, #0x2d -; mov x28, #0x32 -; mov x27, #0x37 -; mov x26, #0x3c +; mov x21, #0x28 +; mov x28, #0x2d +; mov x27, #0x32 +; mov x26, #0x37 +; mov x25, #0x3c ; mov x24, #0x41 ; mov x23, #0x46 ; mov x22, #0x4b @@ -587,11 +587,11 @@ block2: ; cbnz x2, #0x12c ; block2: ; offset 0xa0 ; mov x2, #0x8c -; stur x25, [sp, #0x70] -; stur x21, [sp, #0x78] -; stur x28, [sp, #0x80] -; stur x27, [sp, #0x88] -; stur x26, [sp, #0x90] +; stur x21, [sp, #0x70] +; stur x28, [sp, #0x78] +; stur x27, [sp, #0x80] +; stur x26, [sp, #0x88] +; stur x25, [sp, #0x90] ; stur x24, [sp, #0x98] ; stur x23, [sp, #0xa0] ; stur x22, [sp, #0xa8] @@ -623,11 +623,11 @@ block2: ; br x1 ; block3: ; offset 0x12c ; ldur x2, [sp] -; stur x25, [sp, #0x80] -; stur x21, [sp, #0x88] -; stur x28, [sp, #0x90] -; stur x27, [sp, #0x98] -; stur x26, [sp, #0xa0] +; stur x21, [sp, #0x80] +; stur x28, [sp, #0x88] +; stur x27, [sp, #0x90] +; stur x26, [sp, #0x98] +; stur x25, [sp, #0xa0] ; stur x24, [sp, #0xa8] ; stur x23, [sp, #0xb0] ; stur x22, [sp, #0xb8] diff --git a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif index 41045f0acb..9ba81c4ec6 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tail-call-conv.clif @@ -380,32 +380,23 @@ block0: ; VCode: ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; sub sp, sp, #160 +; stp x27, x28, [sp, #-16]! +; stp x25, x26, [sp, #-16]! +; stp x23, x24, [sp, #-16]! +; stp x21, x22, [sp, #-16]! +; stp x19, x20, [sp, #-16]! +; sub sp, sp, #240 ; block0: ; mov x8, sp ; load_ext_name x12, TestCase(%tail_callee_stack_rets)+0 ; blr x12 -; ldr x9, [sp] -; ldr x11, [sp, #8] -; ldr x13, [sp, #16] -; ldr x15, [sp, #24] -; ldr x1, [sp, #32] -; ldr x3, [sp, #40] -; ldr x5, [sp, #48] -; ldr x7, [sp, #56] -; ldr x9, [sp, #64] -; ldr x11, [sp, #72] -; ldr x13, [sp, #80] -; ldr x15, [sp, #88] -; ldr x1, [sp, #96] -; ldr x3, [sp, #104] -; ldr x5, [sp, #112] -; ldr x7, [sp, #120] -; ldr x9, [sp, #128] -; ldr x11, [sp, #136] -; ldr x13, [sp, #144] -; ldr x2, [sp, #152] -; add sp, sp, #160 +; ldr x2, [sp, #232] +; add sp, sp, #240 +; ldp x19, x20, [sp], #16 +; ldp x21, x22, [sp], #16 +; ldp x23, x24, [sp], #16 +; ldp x25, x26, [sp], #16 +; ldp x27, x28, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret ; @@ -413,35 +404,56 @@ block0: ; block0: ; offset 0x0 ; stp x29, x30, [sp, #-0x10]! ; mov x29, sp -; sub sp, sp, #0xa0 -; block1: ; offset 0xc +; stp x27, x28, [sp, #-0x10]! +; stp x25, x26, [sp, #-0x10]! +; stp x23, x24, [sp, #-0x10]! +; stp x21, x22, [sp, #-0x10]! +; stp x19, x20, [sp, #-0x10]! +; sub sp, sp, #0xf0 +; block1: ; offset 0x20 ; mov x8, sp -; ldr x12, #0x18 -; b #0x20 +; ldr x12, #0x2c +; b #0x34 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x12 ; ldur x9, [sp] -; ldur x11, [sp, #8] -; ldur x13, [sp, #0x10] -; ldur x15, [sp, #0x18] -; ldur x1, [sp, #0x20] -; ldur x3, [sp, #0x28] -; ldur x5, [sp, #0x30] -; ldur x7, [sp, #0x38] +; stur x9, [sp, #0xa0] +; ldur x9, [sp, #8] +; stur x9, [sp, #0xa8] +; ldur x9, [sp, #0x10] +; stur x9, [sp, #0xb0] +; ldur x9, [sp, #0x18] +; stur x9, [sp, #0xb8] +; ldur x9, [sp, #0x20] +; stur x9, [sp, #0xc0] +; ldur x9, [sp, #0x28] +; stur x9, [sp, #0xc8] +; ldur x9, [sp, #0x30] +; stur x9, [sp, #0xd0] +; ldur x9, [sp, #0x38] +; stur x9, [sp, #0xd8] ; ldur x9, [sp, #0x40] -; ldur x11, [sp, #0x48] -; ldur x13, [sp, #0x50] -; ldur x15, [sp, #0x58] -; ldur x1, [sp, #0x60] -; ldur x3, [sp, #0x68] -; ldur x5, [sp, #0x70] -; ldur x7, [sp, #0x78] -; ldur x9, [sp, #0x80] -; ldur x11, [sp, #0x88] -; ldur x13, [sp, #0x90] -; ldur x2, [sp, #0x98] -; add sp, sp, #0xa0 +; stur x9, [sp, #0xe0] +; ldur x25, [sp, #0x48] +; ldur x26, [sp, #0x50] +; ldur x27, [sp, #0x58] +; ldur x28, [sp, #0x60] +; ldur x21, [sp, #0x68] +; ldur x19, [sp, #0x70] +; ldur x20, [sp, #0x78] +; ldur x22, [sp, #0x80] +; ldur x23, [sp, #0x88] +; ldur x24, [sp, #0x90] +; ldur x9, [sp, #0x98] +; stur x9, [sp, #0xe8] +; ldur x2, [sp, #0xe8] +; add sp, sp, #0xf0 +; ldp x19, x20, [sp], #0x10 +; ldp x21, x22, [sp], #0x10 +; ldp x23, x24, [sp], #0x10 +; ldp x25, x26, [sp], #0x10 +; ldp x27, x28, [sp], #0x10 ; ldp x29, x30, [sp], #0x10 ; ret @@ -620,7 +632,7 @@ block0: ; stp x23, x24, [sp, #-16]! ; stp x21, x22, [sp, #-16]! ; stp x19, x20, [sp, #-16]! -; sub sp, sp, #320 +; sub sp, sp, #400 ; block0: ; movz x2, #10 ; movz x3, #15 @@ -671,27 +683,8 @@ block0: ; add x8, sp, #160 ; load_ext_name x10, TestCase(%tail_callee_stack_args_and_rets)+0 ; blr x10 -; ldr x7, [sp, #160] -; ldr x9, [sp, #168] -; ldr x11, [sp, #176] -; ldr x13, [sp, #184] -; ldr x15, [sp, #192] -; ldr x1, [sp, #200] -; ldr x3, [sp, #208] -; ldr x5, [sp, #216] -; ldr x7, [sp, #224] -; ldr x9, [sp, #232] -; ldr x11, [sp, #240] -; ldr x13, [sp, #248] -; ldr x15, [sp, #256] -; ldr x1, [sp, #264] -; ldr x3, [sp, #272] -; ldr x5, [sp, #280] -; ldr x7, [sp, #288] -; ldr x9, [sp, #296] -; ldr x11, [sp, #304] -; ldr x2, [sp, #312] -; add sp, sp, #320 +; ldr x2, [sp, #392] +; add sp, sp, #400 ; ldp x19, x20, [sp], #16 ; ldp x21, x22, [sp], #16 ; ldp x23, x24, [sp], #16 @@ -709,7 +702,7 @@ block0: ; stp x23, x24, [sp, #-0x10]! ; stp x21, x22, [sp, #-0x10]! ; stp x19, x20, [sp, #-0x10]! -; sub sp, sp, #0x140 +; sub sp, sp, #0x190 ; block1: ; offset 0x20 ; mov x2, #0xa ; mov x3, #0xf @@ -764,27 +757,38 @@ block0: ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x10 ; sub sp, sp, #0xa0 -; ldur x7, [sp, #0xa0] +; ldur x9, [sp, #0xa0] +; str x9, [sp, #0x140] ; ldur x9, [sp, #0xa8] -; ldur x11, [sp, #0xb0] -; ldur x13, [sp, #0xb8] -; ldur x15, [sp, #0xc0] -; ldur x1, [sp, #0xc8] -; ldur x3, [sp, #0xd0] -; ldur x5, [sp, #0xd8] -; ldur x7, [sp, #0xe0] -; ldur x9, [sp, #0xe8] -; ldur x11, [sp, #0xf0] -; ldur x13, [sp, #0xf8] -; ldr x15, [sp, #0x100] -; ldr x1, [sp, #0x108] -; ldr x3, [sp, #0x110] -; ldr x5, [sp, #0x118] -; ldr x7, [sp, #0x120] -; ldr x9, [sp, #0x128] -; ldr x11, [sp, #0x130] -; ldr x2, [sp, #0x138] -; add sp, sp, #0x140 +; str x9, [sp, #0x148] +; ldur x9, [sp, #0xb0] +; str x9, [sp, #0x150] +; ldur x9, [sp, #0xb8] +; str x9, [sp, #0x158] +; ldur x9, [sp, #0xc0] +; str x9, [sp, #0x160] +; ldur x9, [sp, #0xc8] +; str x9, [sp, #0x168] +; ldur x9, [sp, #0xd0] +; str x9, [sp, #0x170] +; ldur x9, [sp, #0xd8] +; str x9, [sp, #0x178] +; ldur x9, [sp, #0xe0] +; str x9, [sp, #0x180] +; ldur x25, [sp, #0xe8] +; ldur x26, [sp, #0xf0] +; ldur x27, [sp, #0xf8] +; ldr x28, [sp, #0x100] +; ldr x21, [sp, #0x108] +; ldr x19, [sp, #0x110] +; ldr x20, [sp, #0x118] +; ldr x22, [sp, #0x120] +; ldr x23, [sp, #0x128] +; ldr x24, [sp, #0x130] +; ldr x9, [sp, #0x138] +; str x9, [sp, #0x188] +; ldr x2, [sp, #0x188] +; add sp, sp, #0x190 ; ldp x19, x20, [sp], #0x10 ; ldp x21, x22, [sp], #0x10 ; ldp x23, x24, [sp], #0x10 diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 6231d59c0d..a2fa6dd632 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I32) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I64) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i, types::I64) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i, types::I64) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -214,80 +214,71 @@ block0: } ; VCode: -; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; xmov x21, x12 -; x28 = xload64 OutgoingArg(0) // flags = notrap aligned -; x16 = xload64 OutgoingArg(8) // flags = notrap aligned -; x12 = xload64 OutgoingArg(16) // flags = notrap aligned -; x15 = xload64 OutgoingArg(24) // flags = notrap aligned -; x22 = xload64 OutgoingArg(32) // flags = notrap aligned -; x24 = xload64 OutgoingArg(40) // flags = notrap aligned -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I64) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i, types::I64) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i, types::I64) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i, types::I64) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i, types::I64) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i, types::I64) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i, types::I64) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i, types::I64) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i, types::I64) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i, types::I64) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i, types::I64) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i, types::I64) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i, types::I64) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i, types::I64) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; ret ; ; Disassembled: -; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28 +; push_frame_save 112, x16, x17, x18, x19, x26, x27, x28, x29 ; xmov x12, sp ; call1 x12, 0x0 // target = 0x8 -; xmov x21, x12 -; xload64le_o32 x28, sp, 0 -; xload64le_o32 x16, sp, 8 -; xload64le_o32 x12, sp, 16 -; xload64le_o32 x15, sp, 24 -; xload64le_o32 x22, sp, 32 -; xload64le_o32 x24, sp, 40 -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28 +; jump 0x5 // target = 0x13 +; xload64le_o32 x27, sp, 0 +; xload64le_o32 x19, sp, 8 +; xload64le_o32 x29, sp, 16 +; xload64le_o32 x16, sp, 24 +; xload64le_o32 x17, sp, 32 +; xload64le_o32 x18, sp, 40 +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, x16, x17, x18, x19, x26, x27, x28, x29 ; ret function %call_indirect(i32) -> i64 { @@ -301,7 +292,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index bde96cabe9..64044b0186 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -16,7 +16,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -43,7 +43,7 @@ block0: ; push_frame ; block0: ; xzero x2 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I32) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xone x0 ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I64) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i, types::I64) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i, types::I64) }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -214,80 +214,71 @@ block0: } ; VCode: -; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; push_frame_save 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; block0: ; x12 = load_addr OutgoingArg(0) -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } -; xmov x21, x12 -; x28 = xload64 OutgoingArg(0) // flags = notrap aligned -; x16 = xload64 OutgoingArg(8) // flags = notrap aligned -; x12 = xload64 OutgoingArg(16) // flags = notrap aligned -; x15 = xload64 OutgoingArg(24) // flags = notrap aligned -; x22 = xload64 OutgoingArg(32) // flags = notrap aligned -; x24 = xload64 OutgoingArg(40) // flags = notrap aligned -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28} +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I64) }, CallRetPair { vreg: Writable { reg: p2i }, location: Reg(p2i, types::I64) }, CallRetPair { vreg: Writable { reg: p3i }, location: Reg(p3i, types::I64) }, CallRetPair { vreg: Writable { reg: p4i }, location: Reg(p4i, types::I64) }, CallRetPair { vreg: Writable { reg: p5i }, location: Reg(p5i, types::I64) }, CallRetPair { vreg: Writable { reg: p6i }, location: Reg(p6i, types::I64) }, CallRetPair { vreg: Writable { reg: p7i }, location: Reg(p7i, types::I64) }, CallRetPair { vreg: Writable { reg: p8i }, location: Reg(p8i, types::I64) }, CallRetPair { vreg: Writable { reg: p9i }, location: Reg(p9i, types::I64) }, CallRetPair { vreg: Writable { reg: p10i }, location: Reg(p10i, types::I64) }, CallRetPair { vreg: Writable { reg: p11i }, location: Reg(p11i, types::I64) }, CallRetPair { vreg: Writable { reg: p12i }, location: Reg(p12i, types::I64) }, CallRetPair { vreg: Writable { reg: p13i }, location: Reg(p13i, types::I64) }, CallRetPair { vreg: Writable { reg: p14i }, location: Reg(p14i, types::I64) }, CallRetPair { vreg: Writable { reg: p27i }, location: Stack(OutgoingArg(0), types::I64) }, CallRetPair { vreg: Writable { reg: p19i }, location: Stack(OutgoingArg(8), types::I64) }, CallRetPair { vreg: Writable { reg: p29i }, location: Stack(OutgoingArg(16), types::I64) }, CallRetPair { vreg: Writable { reg: p16i }, location: Stack(OutgoingArg(24), types::I64) }, CallRetPair { vreg: Writable { reg: p17i }, location: Stack(OutgoingArg(32), types::I64) }, CallRetPair { vreg: Writable { reg: p18i }, location: Stack(OutgoingArg(40), types::I64) }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, {x16, x17, x18, x19, x26, x27, x28, x29} ; ret ; ; Disassembled: -; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28 +; push_frame_save 112, x16, x17, x18, x19, x26, x27, x28, x29 ; xmov x12, sp ; call1 x12, 0x0 // target = 0x8 -; xmov x21, x12 -; xload64le_o32 x28, sp, 0 -; xload64le_o32 x16, sp, 8 -; xload64le_o32 x12, sp, 16 -; xload64le_o32 x15, sp, 24 -; xload64le_o32 x22, sp, 32 -; xload64le_o32 x24, sp, 40 -; xadd64 x20, x0, x1 -; xadd64 x19, x2, x3 -; xadd64 x18, x4, x5 -; xadd64 x4, x6, x7 -; xadd64 x5, x8, x9 -; xadd64 x2, x10, x11 -; xmov x11, x21 -; xadd64 x3, x11, x13 -; xadd64 x14, x14, x28 -; xadd64 x0, x16, x12 -; xadd64 x15, x12, x15 -; xadd64 x1, x22, x24 -; xadd64 x6, x20, x19 -; xadd64 x4, x18, x4 -; xadd64 x2, x5, x2 -; xadd64 x14, x3, x14 -; xadd64 x15, x0, x15 -; xadd64 x0, x1, x1 -; xadd64 x1, x6, x4 -; xadd64 x14, x2, x14 -; xadd64 x15, x15, x0 -; xadd64 x14, x1, x14 -; xadd64 x15, x15, x15 -; xadd64 x0, x14, x15 -; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28 +; jump 0x5 // target = 0x13 +; xload64le_o32 x27, sp, 0 +; xload64le_o32 x19, sp, 8 +; xload64le_o32 x29, sp, 16 +; xload64le_o32 x16, sp, 24 +; xload64le_o32 x17, sp, 32 +; xload64le_o32 x18, sp, 40 +; xadd64 x26, x0, x1 +; xadd64 x28, x2, x3 +; xadd64 x2, x4, x5 +; xadd64 x15, x6, x7 +; xadd64 x1, x8, x9 +; xadd64 x3, x10, x11 +; xadd64 x4, x12, x13 +; xadd64 x12, x14, x27 +; xadd64 x13, x19, x29 +; xadd64 x9, x29, x16 +; xadd64 x10, x17, x18 +; xadd64 x8, x26, x28 +; xadd64 x11, x2, x15 +; xadd64 x14, x1, x3 +; xadd64 x12, x4, x12 +; xadd64 x9, x13, x9 +; xadd64 x10, x10, x10 +; xadd64 x8, x8, x11 +; xadd64 x11, x14, x12 +; xadd64 x9, x9, x10 +; xadd64 x8, x8, x11 +; xadd64 x9, x9, x9 +; xadd64 x0, x8, x9 +; pop_frame_restore 112, x16, x17, x18, x19, x26, x27, x28, x29 ; ret function %call_indirect(i64) -> i64 { @@ -301,7 +292,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } +; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -397,7 +388,7 @@ block0(v0: i32): ; xstore64 sp+1000008, x20 // flags = notrap aligned ; block0: ; xmov x20, x0 -; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I32) }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x5, x20 ; xadd32 x0, x5, x0 ; x20 = xload64 sp+1000008 // flags = notrap aligned diff --git a/cranelift/filetests/filetests/isa/riscv64/return-call.clif b/cranelift/filetests/filetests/isa/riscv64/return-call.clif index b472a6c31c..2a47ad837a 100644 --- a/cranelift/filetests/filetests/isa/riscv64/return-call.clif +++ b/cranelift/filetests/filetests/isa/riscv64/return-call.clif @@ -561,10 +561,10 @@ block2: ; block0: ; li a1,10 ; sd a1,16(slot) -; li a2,15 -; sd a2,8(slot) -; li a3,20 -; sd a3,0(slot) +; li a1,15 +; sd a1,8(slot) +; li a2,20 +; sd a2,0(slot) ; li a3,25 ; li a4,30 ; li a5,35 @@ -572,11 +572,11 @@ block2: ; li a7,45 ; li a2,50 ; li a1,55 -; li s1,60 -; li s5,65 -; li s4,70 -; li s3,75 -; li s2,80 +; li s5,60 +; li s4,65 +; li s3,70 +; li s2,75 +; li s1,80 ; li t4,85 ; li t3,90 ; li t2,95 @@ -593,11 +593,11 @@ block2: ; li a0,140 ; sd a2,-160(incoming_arg) ; sd a1,-152(incoming_arg) -; sd s1,-144(incoming_arg) -; sd s5,-136(incoming_arg) -; sd s4,-128(incoming_arg) -; sd s3,-120(incoming_arg) -; sd s2,-112(incoming_arg) +; sd s5,-144(incoming_arg) +; sd s4,-136(incoming_arg) +; sd s3,-128(incoming_arg) +; sd s2,-120(incoming_arg) +; sd s1,-112(incoming_arg) ; sd t4,-104(incoming_arg) ; sd t3,-96(incoming_arg) ; sd t2,-88(incoming_arg) @@ -619,11 +619,11 @@ block2: ; ld a0,16(slot) ; sd a2,-144(incoming_arg) ; sd a1,-136(incoming_arg) -; sd s1,-128(incoming_arg) -; sd s5,-120(incoming_arg) -; sd s4,-112(incoming_arg) -; sd s3,-104(incoming_arg) -; sd s2,-96(incoming_arg) +; sd s5,-128(incoming_arg) +; sd s4,-120(incoming_arg) +; sd s3,-112(incoming_arg) +; sd s2,-104(incoming_arg) +; sd s1,-96(incoming_arg) ; sd t4,-88(incoming_arg) ; sd t3,-80(incoming_arg) ; sd t2,-72(incoming_arg) @@ -666,10 +666,10 @@ block2: ; block1: ; offset 0x54 ; addi a1, zero, 0xa ; sd a1, 0x10(sp) -; addi a2, zero, 0xf -; sd a2, 8(sp) -; addi a3, zero, 0x14 -; sd a3, 0(sp) +; addi a1, zero, 0xf +; sd a1, 8(sp) +; addi a2, zero, 0x14 +; sd a2, 0(sp) ; addi a3, zero, 0x19 ; addi a4, zero, 0x1e ; addi a5, zero, 0x23 @@ -677,11 +677,11 @@ block2: ; addi a7, zero, 0x2d ; addi a2, zero, 0x32 ; addi a1, zero, 0x37 -; addi s1, zero, 0x3c -; addi s5, zero, 0x41 -; addi s4, zero, 0x46 -; addi s3, zero, 0x4b -; addi s2, zero, 0x50 +; addi s5, zero, 0x3c +; addi s4, zero, 0x41 +; addi s3, zero, 0x46 +; addi s2, zero, 0x4b +; addi s1, zero, 0x50 ; addi t4, zero, 0x55 ; addi t3, zero, 0x5a ; addi t2, zero, 0x5f @@ -698,11 +698,11 @@ block2: ; addi a0, zero, 0x8c ; sd a2, 0x90(sp) ; sd a1, 0x98(sp) -; sd s1, 0xa0(sp) -; sd s5, 0xa8(sp) -; sd s4, 0xb0(sp) -; sd s3, 0xb8(sp) -; sd s2, 0xc0(sp) +; sd s5, 0xa0(sp) +; sd s4, 0xa8(sp) +; sd s3, 0xb0(sp) +; sd s2, 0xb8(sp) +; sd s1, 0xc0(sp) ; sd t4, 0xc8(sp) ; sd t3, 0xd0(sp) ; sd t2, 0xd8(sp) @@ -742,11 +742,11 @@ block2: ; ld a0, 0x10(sp) ; sd a2, 0xa0(sp) ; sd a1, 0xa8(sp) -; sd s1, 0xb0(sp) -; sd s5, 0xb8(sp) -; sd s4, 0xc0(sp) -; sd s3, 0xc8(sp) -; sd s2, 0xd0(sp) +; sd s5, 0xb0(sp) +; sd s4, 0xb8(sp) +; sd s3, 0xc0(sp) +; sd s2, 0xc8(sp) +; sd s1, 0xd0(sp) ; sd t4, 0xd8(sp) ; sd t3, 0xe0(sp) ; sd t2, 0xe8(sp) diff --git a/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif index b276f88657..219c0e0184 100644 --- a/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/riscv64/tail-call-conv.clif @@ -475,36 +475,35 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; addi sp,sp,-192 +; addi sp,sp,-400 +; sd s1,392(sp) +; sd s2,384(sp) +; sd s3,376(sp) +; sd s4,368(sp) +; sd s5,360(sp) +; sd s6,352(sp) +; sd s7,344(sp) +; sd s8,336(sp) +; sd s9,328(sp) +; sd s10,320(sp) +; sd s11,312(sp) ; block0: ; load_addr a0,0(sp) ; load_sym a4,%tail_callee_stack_rets+0 ; callind a4 -; ld a5,0(sp) -; ld a1,8(sp) -; ld a3,16(sp) -; ld a5,24(sp) -; ld a1,32(sp) -; ld a3,40(sp) -; ld a5,48(sp) -; ld a1,56(sp) -; ld a3,64(sp) -; ld a5,72(sp) -; ld a1,80(sp) -; ld a3,88(sp) -; ld a5,96(sp) -; ld a1,104(sp) -; ld a3,112(sp) -; ld a5,120(sp) -; ld a1,128(sp) -; ld a3,136(sp) -; ld a5,144(sp) -; ld a1,152(sp) -; ld a3,160(sp) -; ld a5,168(sp) -; ld a1,176(sp) -; ld a0,184(sp) -; addi sp,sp,192 +; ld a0,96(slot) +; ld s1,392(sp) +; ld s2,384(sp) +; ld s3,376(sp) +; ld s4,368(sp) +; ld s5,360(sp) +; ld s6,352(sp) +; ld s7,344(sp) +; ld s8,336(sp) +; ld s9,328(sp) +; ld s10,320(sp) +; ld s11,312(sp) +; addi sp,sp,400 ; ld ra,8(sp) ; ld fp,0(sp) ; addi sp,sp,16 @@ -516,8 +515,19 @@ block0: ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; addi sp, sp, -0xc0 -; block1: ; offset 0x14 +; addi sp, sp, -0x190 +; sd s1, 0x188(sp) +; sd s2, 0x180(sp) +; sd s3, 0x178(sp) +; sd s4, 0x170(sp) +; sd s5, 0x168(sp) +; sd s6, 0x160(sp) +; sd s7, 0x158(sp) +; sd s8, 0x150(sp) +; sd s9, 0x148(sp) +; sd s10, 0x140(sp) +; sd s11, 0x138(sp) +; block1: ; offset 0x40 ; mv a0, sp ; auipc a4, 0 ; ld a4, 0xc(a4) @@ -525,31 +535,57 @@ block0: ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; jalr a4 -; ld a5, 0(sp) -; ld a1, 8(sp) -; ld a3, 0x10(sp) -; ld a5, 0x18(sp) -; ld a1, 0x20(sp) -; ld a3, 0x28(sp) -; ld a5, 0x30(sp) -; ld a1, 0x38(sp) -; ld a3, 0x40(sp) -; ld a5, 0x48(sp) -; ld a1, 0x50(sp) -; ld a3, 0x58(sp) -; ld a5, 0x60(sp) -; ld a1, 0x68(sp) -; ld a3, 0x70(sp) -; ld a5, 0x78(sp) -; ld a1, 0x80(sp) -; ld a3, 0x88(sp) -; ld a5, 0x90(sp) -; ld a1, 0x98(sp) -; ld a3, 0xa0(sp) -; ld a5, 0xa8(sp) -; ld a1, 0xb0(sp) -; ld a0, 0xb8(sp) -; addi sp, sp, 0xc0 +; j 4 +; ld a2, 0(sp) +; sd a2, 0xc0(sp) +; ld a2, 8(sp) +; sd a2, 0xc8(sp) +; ld a2, 0x10(sp) +; sd a2, 0xd0(sp) +; ld a2, 0x18(sp) +; sd a2, 0xd8(sp) +; ld a2, 0x20(sp) +; sd a2, 0xe0(sp) +; ld a2, 0x28(sp) +; sd a2, 0xe8(sp) +; ld a2, 0x30(sp) +; sd a2, 0xf0(sp) +; ld a2, 0x38(sp) +; sd a2, 0xf8(sp) +; ld a2, 0x40(sp) +; sd a2, 0x100(sp) +; ld a2, 0x48(sp) +; sd a2, 0x108(sp) +; ld a2, 0x50(sp) +; sd a2, 0x110(sp) +; ld a2, 0x58(sp) +; sd a2, 0x118(sp) +; ld s8, 0x60(sp) +; ld s9, 0x68(sp) +; ld s10, 0x70(sp) +; ld s11, 0x78(sp) +; ld s7, 0x80(sp) +; ld s6, 0x88(sp) +; ld s5, 0x90(sp) +; ld s4, 0x98(sp) +; ld s3, 0xa0(sp) +; ld s2, 0xa8(sp) +; ld s1, 0xb0(sp) +; ld a2, 0xb8(sp) +; sd a2, 0x120(sp) +; ld a0, 0x120(sp) +; ld s1, 0x188(sp) +; ld s2, 0x180(sp) +; ld s3, 0x178(sp) +; ld s4, 0x170(sp) +; ld s5, 0x168(sp) +; ld s6, 0x160(sp) +; ld s7, 0x158(sp) +; ld s8, 0x150(sp) +; ld s9, 0x148(sp) +; ld s10, 0x140(sp) +; ld s11, 0x138(sp) +; addi sp, sp, 0x190 ; ld ra, 8(sp) ; ld s0, 0(sp) ; addi sp, sp, 0x10 @@ -773,23 +809,23 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; addi sp,sp,-464 -; sd s1,456(sp) -; sd s2,448(sp) -; sd s3,440(sp) -; sd s4,432(sp) -; sd s5,424(sp) -; sd s6,416(sp) -; sd s7,408(sp) -; sd s8,400(sp) -; sd s9,392(sp) -; sd s10,384(sp) -; sd s11,376(sp) +; addi sp,sp,-560 +; sd s1,552(sp) +; sd s2,544(sp) +; sd s3,536(sp) +; sd s4,528(sp) +; sd s5,520(sp) +; sd s6,512(sp) +; sd s7,504(sp) +; sd s8,496(sp) +; sd s9,488(sp) +; sd s10,480(sp) +; sd s11,472(sp) ; block0: ; li a2,10 -; sd a2,8(slot) +; sd a2,0(slot) ; li a3,15 -; sd a3,0(slot) +; sd a3,96(slot) ; li a3,20 ; li a4,25 ; li a5,30 @@ -835,45 +871,22 @@ block0: ; sd a2,144(sp) ; load_addr a0,160(sp) ; load_sym t1,%tail_callee_stack_args_and_rets+0 -; ld a1,8(slot) -; ld a2,0(slot) +; ld a1,0(slot) +; ld a2,96(slot) ; callind t1 -; ld a2,160(sp) -; ld a4,168(sp) -; ld a0,176(sp) -; ld a2,184(sp) -; ld a4,192(sp) -; ld a0,200(sp) -; ld a2,208(sp) -; ld a4,216(sp) -; ld a0,224(sp) -; ld a2,232(sp) -; ld a4,240(sp) -; ld a0,248(sp) -; ld a2,256(sp) -; ld a4,264(sp) -; ld a0,272(sp) -; ld a2,280(sp) -; ld a4,288(sp) -; ld a0,296(sp) -; ld a2,304(sp) -; ld a4,312(sp) -; ld a0,320(sp) -; ld a2,328(sp) -; ld a4,336(sp) -; ld a0,344(sp) -; ld s1,456(sp) -; ld s2,448(sp) -; ld s3,440(sp) -; ld s4,432(sp) -; ld s5,424(sp) -; ld s6,416(sp) -; ld s7,408(sp) -; ld s8,400(sp) -; ld s9,392(sp) -; ld s10,384(sp) -; ld s11,376(sp) -; addi sp,sp,464 +; ld a0,96(slot) +; ld s1,552(sp) +; ld s2,544(sp) +; ld s3,536(sp) +; ld s4,528(sp) +; ld s5,520(sp) +; ld s6,512(sp) +; ld s7,504(sp) +; ld s8,496(sp) +; ld s9,488(sp) +; ld s10,480(sp) +; ld s11,472(sp) +; addi sp,sp,560 ; ld ra,8(sp) ; ld fp,0(sp) ; addi sp,sp,16 @@ -885,23 +898,23 @@ block0: ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; addi sp, sp, -0x1d0 -; sd s1, 0x1c8(sp) -; sd s2, 0x1c0(sp) -; sd s3, 0x1b8(sp) -; sd s4, 0x1b0(sp) -; sd s5, 0x1a8(sp) -; sd s6, 0x1a0(sp) -; sd s7, 0x198(sp) -; sd s8, 0x190(sp) -; sd s9, 0x188(sp) -; sd s10, 0x180(sp) -; sd s11, 0x178(sp) +; addi sp, sp, -0x230 +; sd s1, 0x228(sp) +; sd s2, 0x220(sp) +; sd s3, 0x218(sp) +; sd s4, 0x210(sp) +; sd s5, 0x208(sp) +; sd s6, 0x200(sp) +; sd s7, 0x1f8(sp) +; sd s8, 0x1f0(sp) +; sd s9, 0x1e8(sp) +; sd s10, 0x1e0(sp) +; sd s11, 0x1d8(sp) ; block1: ; offset 0x40 ; addi a2, zero, 0xa -; sd a2, 0x168(sp) +; sd a2, 0x160(sp) ; addi a3, zero, 0xf -; sd a3, 0x160(sp) +; sd a3, 0x1c0(sp) ; addi a3, zero, 0x14 ; addi a4, zero, 0x19 ; addi a5, zero, 0x1e @@ -951,46 +964,61 @@ block0: ; j 0xc ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 ; .byte 0x00, 0x00, 0x00, 0x00 -; ld a1, 0x168(sp) -; ld a2, 0x160(sp) +; ld a1, 0x160(sp) +; ld a2, 0x1c0(sp) ; jalr t1 ; addi sp, sp, -0xa0 +; j 4 ; ld a2, 0xa0(sp) -; ld a4, 0xa8(sp) -; ld a0, 0xb0(sp) +; sd a2, 0x160(sp) +; ld a2, 0xa8(sp) +; sd a2, 0x168(sp) +; ld a2, 0xb0(sp) +; sd a2, 0x170(sp) ; ld a2, 0xb8(sp) -; ld a4, 0xc0(sp) -; ld a0, 0xc8(sp) +; sd a2, 0x178(sp) +; ld a2, 0xc0(sp) +; sd a2, 0x180(sp) +; ld a2, 0xc8(sp) +; sd a2, 0x188(sp) ; ld a2, 0xd0(sp) -; ld a4, 0xd8(sp) -; ld a0, 0xe0(sp) +; sd a2, 0x190(sp) +; ld a2, 0xd8(sp) +; sd a2, 0x198(sp) +; ld a2, 0xe0(sp) +; sd a2, 0x1a0(sp) ; ld a2, 0xe8(sp) -; ld a4, 0xf0(sp) -; ld a0, 0xf8(sp) -; ld a2, 0x100(sp) -; ld a4, 0x108(sp) -; ld a0, 0x110(sp) -; ld a2, 0x118(sp) -; ld a4, 0x120(sp) -; ld a0, 0x128(sp) -; ld a2, 0x130(sp) -; ld a4, 0x138(sp) -; ld a0, 0x140(sp) -; ld a2, 0x148(sp) -; ld a4, 0x150(sp) -; ld a0, 0x158(sp) -; ld s1, 0x1c8(sp) -; ld s2, 0x1c0(sp) -; ld s3, 0x1b8(sp) -; ld s4, 0x1b0(sp) -; ld s5, 0x1a8(sp) -; ld s6, 0x1a0(sp) -; ld s7, 0x198(sp) -; ld s8, 0x190(sp) -; ld s9, 0x188(sp) -; ld s10, 0x180(sp) -; ld s11, 0x178(sp) -; addi sp, sp, 0x1d0 +; sd a2, 0x1a8(sp) +; ld a2, 0xf0(sp) +; sd a2, 0x1b0(sp) +; ld a2, 0xf8(sp) +; sd a2, 0x1b8(sp) +; ld s8, 0x100(sp) +; ld s9, 0x108(sp) +; ld s10, 0x110(sp) +; ld s11, 0x118(sp) +; ld s7, 0x120(sp) +; ld s6, 0x128(sp) +; ld s5, 0x130(sp) +; ld s4, 0x138(sp) +; ld s3, 0x140(sp) +; ld s2, 0x148(sp) +; ld s1, 0x150(sp) +; ld a2, 0x158(sp) +; sd a2, 0x1c0(sp) +; ld a0, 0x1c0(sp) +; ld s1, 0x228(sp) +; ld s2, 0x220(sp) +; ld s3, 0x218(sp) +; ld s4, 0x210(sp) +; ld s5, 0x208(sp) +; ld s6, 0x200(sp) +; ld s7, 0x1f8(sp) +; ld s8, 0x1f0(sp) +; ld s9, 0x1e8(sp) +; ld s10, 0x1e0(sp) +; ld s11, 0x1d8(sp) +; addi sp, sp, 0x230 ; ld ra, 8(sp) ; ld s0, 0(sp) ; addi sp, sp, 0x10 diff --git a/cranelift/filetests/filetests/isa/s390x/call-tail.clif b/cranelift/filetests/filetests/isa/s390x/call-tail.clif index 1ce224b65c..2161965afb 100644 --- a/cranelift/filetests/filetests/isa/s390x/call-tail.clif +++ b/cranelift/filetests/filetests/isa/s390x/call-tail.clif @@ -17,8 +17,8 @@ block0(v0: i64): ; stmg %r14, %r15, 112(%r15) ; aghi %r15, -160 ; block0: -; bras %r1, 12 ; data %g + 0 ; lg %r5, 0(%r1) -; basr %r14, %r5 +; bras %r1, 12 ; data %g + 0 ; lg %r4, 0(%r1) +; basr %r14, %r4 ; lmg %r14, %r15, 272(%r15) ; br %r14 ; @@ -32,8 +32,8 @@ block0(v0: i64): ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 -; lg %r5, 0(%r1) -; basr %r14, %r5 +; lg %r4, 0(%r1) +; basr %r14, %r4 ; lmg %r14, %r15, 0x110(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif index 13d6ae00f1..41f1a909d8 100644 --- a/cranelift/filetests/filetests/isa/s390x/call.clif +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -18,8 +18,8 @@ block0(v0: i64): ; stmg %r14, %r15, 112(%r15) ; aghi %r15, -160 ; block0: -; bras %r1, 12 ; data %g + 0 ; lg %r5, 0(%r1) -; basr %r14, %r5 +; bras %r1, 12 ; data %g + 0 ; lg %r4, 0(%r1) +; basr %r14, %r4 ; lmg %r14, %r15, 272(%r15) ; br %r14 ; @@ -33,8 +33,8 @@ block0(v0: i64): ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 -; lg %r5, 0(%r1) -; basr %r14, %r5 +; lg %r4, 0(%r1) +; basr %r14, %r4 ; lmg %r14, %r15, 0x110(%r15) ; br %r14 @@ -314,6 +314,7 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: } ; VCode: +; stmg %r6, %r15, 48(%r15) ; block0: ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) @@ -335,10 +336,13 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: ; vaq %v5, %v6, %v7 ; vaq %v4, %v4, %v5 ; vst %v4, 0(%r2) +; lmg %r6, %r15, 48(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 +; stmg %r6, %r15, 0x30(%r15) +; block1: ; offset 0x6 ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) ; vl %v5, 0(%r5) @@ -359,6 +363,7 @@ block0(v0: i128, v1: i128, v2: i128, v3: i128, v4: i128, v5: i128, v6: i128, v7: ; vaq %v5, %v6, %v7 ; vaq %v4, %v4, %v5 ; vst %v4, 0(%r2) +; lmg %r6, %r15, 0x30(%r15) ; br %r14 function %call_sret() -> i64 { diff --git a/cranelift/filetests/filetests/isa/s390x/return-call-indirect.clif b/cranelift/filetests/filetests/isa/s390x/return-call-indirect.clif index eb4bf5f5cc..2474e07f5f 100644 --- a/cranelift/filetests/filetests/isa/s390x/return-call-indirect.clif +++ b/cranelift/filetests/filetests/isa/s390x/return-call-indirect.clif @@ -47,7 +47,7 @@ block0(v0: i64): ; stg %r1, 0(%r15) ; block0: ; bras %r1, 12 ; data %callee_i64 + 0 ; lg %r4, 0(%r1) -; return_call_ind %r4 +; lmg %r14, %r15, 272(%r15) ; br %r4 ; ; Disassembled: ; block0: ; offset 0x0 @@ -83,7 +83,7 @@ block0(v0: i64): ; stg %r1, 0(%r15) ; block0: ; larl %r4, %callee_i64 + 0 -; return_call_ind %r4 +; lmg %r14, %r15, 272(%r15) ; br %r4 ; ; Disassembled: ; block0: ; offset 0x0 @@ -148,7 +148,7 @@ block0(v0: f64): ; stg %r1, 0(%r15) ; block0: ; bras %r1, 12 ; data %callee_f64 + 0 ; lg %r4, 0(%r1) -; return_call_ind %r4 +; lmg %r14, %r15, 272(%r15) ; br %r4 ; ; Disassembled: ; block0: ; offset 0x0 @@ -218,7 +218,7 @@ block0(v0: i8): ; stg %r1, 0(%r15) ; block0: ; bras %r1, 12 ; data %callee_i8 + 0 ; lg %r4, 0(%r1) -; return_call_ind %r4 +; lmg %r14, %r15, 272(%r15) ; br %r4 ; ; Disassembled: ; block0: ; offset 0x0 @@ -357,7 +357,7 @@ block0: ; lg %r6, 240(%r15) ; lg %r7, 232(%r15) ; lg %r12, 160(%r15) -; return_call_ind %r12 ; callee_pop_size 320 +; lgr %r1, %r12 ; aghi %r15, 280 ; lmg %r8, %r14, 384(%r15) ; br %r1 ; callee_pop_size 320 ; ; Disassembled: ; block0: ; offset 0x0 diff --git a/cranelift/filetests/filetests/isa/s390x/return-call.clif b/cranelift/filetests/filetests/isa/s390x/return-call.clif index 4b86bc7f0e..34dbedab24 100644 --- a/cranelift/filetests/filetests/isa/s390x/return-call.clif +++ b/cranelift/filetests/filetests/isa/s390x/return-call.clif @@ -46,7 +46,7 @@ block0(v0: i64): ; stg %r1, 0(%r15) ; block0: ; bras %r1, 12 ; data %callee_i64 + 0 ; lg %r4, 0(%r1) -; return_call_ind %r4 +; lmg %r14, %r15, 272(%r15) ; br %r4 ; ; Disassembled: ; block0: ; offset 0x0 @@ -78,7 +78,7 @@ block0(v0: i64): ; stg %r1, 0(%r15) ; block0: ; bras %r1, 12 ; data %callee_i64_multiret + 0 ; lg %r5, 0(%r1) -; return_call_ind %r5 +; lmg %r14, %r15, 272(%r15) ; br %r5 ; ; Disassembled: ; block0: ; offset 0x0 @@ -112,13 +112,13 @@ block0(v0: i64): ; stg %r1, 0(%r15) ; block0: ; lgr %r9, %r2 -; bras %r1, 12 ; data %callee_i64 + 0 ; lg %r7, 0(%r1) +; bras %r1, 12 ; data %callee_i64 + 0 ; lg %r6, 0(%r1) ; lgr %r2, %r3 -; basr %r14, %r7 +; basr %r14, %r6 ; bras %r1, 12 ; data %callee_i64_multiret + 0 ; lg %r7, 0(%r1) ; lgr %r3, %r2 ; lgr %r2, %r9 -; return_call_ind %r7 +; lmg %r9, %r15, 232(%r15) ; br %r7 ; ; Disassembled: ; block0: ; offset 0x0 @@ -133,9 +133,9 @@ block0(v0: i64): ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 -; lg %r7, 0(%r1) +; lg %r6, 0(%r1) ; lgr %r2, %r3 -; basr %r14, %r7 +; basr %r14, %r6 ; bras %r1, 0x3c ; .byte 0x00, 0x00 ; reloc_external Abs8 %callee_i64_multiret 0 ; .byte 0x00, 0x00 @@ -162,7 +162,7 @@ block0(v0: i64): ; aghi %r15, -160 ; stg %r1, 0(%r15) ; block0: -; return_call %callee_i64 +; lmg %r14, %r15, 272(%r15) ; jg %callee_i64 ; ; Disassembled: ; block0: ; offset 0x0 @@ -224,7 +224,7 @@ block0(v0: f64): ; stg %r1, 0(%r15) ; block0: ; bras %r1, 12 ; data %callee_f64 + 0 ; lg %r4, 0(%r1) -; return_call_ind %r4 +; lmg %r14, %r15, 272(%r15) ; br %r4 ; ; Disassembled: ; block0: ; offset 0x0 @@ -292,7 +292,7 @@ block0(v0: i8): ; stg %r1, 0(%r15) ; block0: ; bras %r1, 12 ; data %callee_i8 + 0 ; lg %r4, 0(%r1) -; return_call_ind %r4 +; lmg %r14, %r15, 272(%r15) ; br %r4 ; ; Disassembled: ; block0: ; offset 0x0 @@ -363,7 +363,7 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v ; lgr %r5, %r9 ; lgr %r2, %r4 ; lgr %r4, %r14 -; return_call %one_stack_arg ; callee_pop_size 168 +; aghi %r15, 176 ; lmg %r9, %r14, 240(%r15) ; jg %one_stack_arg ; callee_pop_size 168 ; ; Disassembled: ; block0: ; offset 0x0 @@ -402,7 +402,7 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v ; stg %r1, 0(%r15) ; block0: ; llgc %r2, 343(%r15) -; return_call %callee_i8 +; aghi %r15, 344 ; lmg %r14, %r14, 112(%r15) ; jg %callee_i8 ; ; Disassembled: ; block0: ; offset 0x0 @@ -443,7 +443,7 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): ; lgr %r4, %r5 ; lgr %r5, %r6 ; lgr %r6, %r8 -; return_call %call_one_stack_arg ; callee_pop_size 184 +; aghi %r15, 160 ; lmg %r8, %r14, 248(%r15) ; jg %call_one_stack_arg ; callee_pop_size 184 ; ; Disassembled: ; block0: ; offset 0x0 @@ -614,7 +614,7 @@ block0: ; lg %r5, 240(%r15) ; lg %r6, 232(%r15) ; lg %r7, 224(%r15) -; return_call_ind %r8 ; callee_pop_size 320 +; lgr %r1, %r8 ; aghi %r15, 272 ; lmg %r8, %r14, 384(%r15) ; br %r1 ; callee_pop_size 320 ; ; Disassembled: ; block0: ; offset 0x0 diff --git a/cranelift/filetests/filetests/isa/s390x/vec-abi-128.clif b/cranelift/filetests/filetests/isa/s390x/vec-abi-128.clif index 9a2d47af3f..b206067da0 100644 --- a/cranelift/filetests/filetests/isa/s390x/vec-abi-128.clif +++ b/cranelift/filetests/filetests/isa/s390x/vec-abi-128.clif @@ -12,7 +12,7 @@ block0(v0: i128): ; VCode: ; stmg %r6, %r15, 48(%r15) -; aghi %r15, -192 +; aghi %r15, -208 ; block0: ; lgr %r6, %r2 ; vl %v1, 0(%r3) @@ -20,17 +20,17 @@ block0(v0: i128): ; la %r3, 160(%r15) ; la %r2, 176(%r15) ; bras %r1, 12 ; data %callee_be + 0 ; lg %r4, 0(%r1) -; basr %r14, %r4 -; vl %v20, 176(%r15) +; basr %r14, %r4 ; vl %v1, 176(%r15) ; vst %v1, 192(%r15) ; lgr %r2, %r6 -; vst %v20, 0(%r2) -; lmg %r6, %r15, 240(%r15) +; vl %v19, 192(%r15) +; vst %v19, 0(%r2) +; lmg %r6, %r15, 256(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 ; stmg %r6, %r15, 0x30(%r15) -; aghi %r15, -0xc0 +; aghi %r15, -0xd0 ; block1: ; offset 0xa ; lgr %r6, %r2 ; vl %v1, 0(%r3) @@ -44,10 +44,12 @@ block0(v0: i128): ; .byte 0x00, 0x00 ; lg %r4, 0(%r1) ; basr %r14, %r4 -; vl %v20, 0xb0(%r15) +; vl %v1, 0xb0(%r15) +; vst %v1, 0xc0(%r15) ; lgr %r2, %r6 -; vst %v20, 0(%r2) -; lmg %r6, %r15, 0xf0(%r15) +; vl %v19, 0xc0(%r15) +; vst %v19, 0(%r2) +; lmg %r6, %r15, 0x100(%r15) ; br %r14 function %caller_be_to_le(i128) -> i128 { @@ -60,15 +62,15 @@ block0(v0: i128): ; VCode: ; stmg %r6, %r15, 48(%r15) -; aghi %r15, -240 -; std %f8, 176(%r15) -; std %f9, 184(%r15) -; std %f10, 192(%r15) -; std %f11, 200(%r15) -; std %f12, 208(%r15) -; std %f13, 216(%r15) -; std %f14, 224(%r15) -; std %f15, 232(%r15) +; aghi %r15, -256 +; std %f8, 192(%r15) +; std %f9, 200(%r15) +; std %f10, 208(%r15) +; std %f11, 216(%r15) +; std %f12, 224(%r15) +; std %f13, 232(%r15) +; std %f14, 240(%r15) +; std %f15, 248(%r15) ; block0: ; lgr %r8, %r2 ; vl %v1, 0(%r3) @@ -77,33 +79,33 @@ block0(v0: i128): ; la %r3, 160(%r15) ; la %r2, 336(%r15) ; bras %r1, 12 ; data %callee_le + 0 ; lg %r4, 0(%r1) -; basr %r14, %r4 ; callee_pop_size 176 -; vl %v21, 160(%r15) +; basr %r14, %r4 ; callee_pop_size 176 ; vl %v1, 336(%r15) ; vst %v1, 352(%r15) ; lgr %r2, %r8 -; vst %v21, 0(%r2) -; ld %f8, 176(%r15) -; ld %f9, 184(%r15) -; ld %f10, 192(%r15) -; ld %f11, 200(%r15) -; ld %f12, 208(%r15) -; ld %f13, 216(%r15) -; ld %f14, 224(%r15) -; ld %f15, 232(%r15) -; lmg %r6, %r15, 288(%r15) +; vl %v20, 176(%r15) +; vst %v20, 0(%r2) +; ld %f8, 192(%r15) +; ld %f9, 200(%r15) +; ld %f10, 208(%r15) +; ld %f11, 216(%r15) +; ld %f12, 224(%r15) +; ld %f13, 232(%r15) +; ld %f14, 240(%r15) +; ld %f15, 248(%r15) +; lmg %r6, %r15, 304(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 ; stmg %r6, %r15, 0x30(%r15) -; aghi %r15, -0xf0 -; std %f8, 0xb0(%r15) -; std %f9, 0xb8(%r15) -; std %f10, 0xc0(%r15) -; std %f11, 0xc8(%r15) -; std %f12, 0xd0(%r15) -; std %f13, 0xd8(%r15) -; std %f14, 0xe0(%r15) -; std %f15, 0xe8(%r15) +; aghi %r15, -0x100 +; std %f8, 0xc0(%r15) +; std %f9, 0xc8(%r15) +; std %f10, 0xd0(%r15) +; std %f11, 0xd8(%r15) +; std %f12, 0xe0(%r15) +; std %f13, 0xe8(%r15) +; std %f14, 0xf0(%r15) +; std %f15, 0xf8(%r15) ; block1: ; offset 0x2a ; lgr %r8, %r2 ; vl %v1, 0(%r3) @@ -118,18 +120,20 @@ block0(v0: i128): ; .byte 0x00, 0x00 ; lg %r4, 0(%r1) ; basr %r14, %r4 -; vl %v21, 0xa0(%r15) +; vl %v1, 0xa0(%r15) +; vst %v1, 0xb0(%r15) ; lgr %r2, %r8 -; vst %v21, 0(%r2) -; ld %f8, 0xb0(%r15) -; ld %f9, 0xb8(%r15) -; ld %f10, 0xc0(%r15) -; ld %f11, 0xc8(%r15) -; ld %f12, 0xd0(%r15) -; ld %f13, 0xd8(%r15) -; ld %f14, 0xe0(%r15) -; ld %f15, 0xe8(%r15) -; lmg %r6, %r15, 0x120(%r15) +; vl %v20, 0xb0(%r15) +; vst %v20, 0(%r2) +; ld %f8, 0xc0(%r15) +; ld %f9, 0xc8(%r15) +; ld %f10, 0xd0(%r15) +; ld %f11, 0xd8(%r15) +; ld %f12, 0xe0(%r15) +; ld %f13, 0xe8(%r15) +; ld %f14, 0xf0(%r15) +; ld %f15, 0xf8(%r15) +; lmg %r6, %r15, 0x130(%r15) ; br %r14 function %caller_le_to_be(i128) -> i128 tail { @@ -142,15 +146,15 @@ block0(v0: i128): ; VCode: ; stmg %r14, %r15, 288(%r15) -; aghi %r15, -256 -; std %f8, 192(%r15) -; std %f9, 200(%r15) -; std %f10, 208(%r15) -; std %f11, 216(%r15) -; std %f12, 224(%r15) -; std %f13, 232(%r15) -; std %f14, 240(%r15) -; std %f15, 248(%r15) +; aghi %r15, -272 +; std %f8, 208(%r15) +; std %f9, 216(%r15) +; std %f10, 224(%r15) +; std %f11, 232(%r15) +; std %f12, 240(%r15) +; std %f13, 248(%r15) +; std %f14, 256(%r15) +; std %f15, 264(%r15) ; block0: ; lgr %r6, %r2 ; vl %v1, 0(%r3) @@ -158,34 +162,34 @@ block0(v0: i128): ; la %r3, 160(%r15) ; la %r2, 176(%r15) ; bras %r1, 12 ; data %callee_be + 0 ; lg %r5, 0(%r1) -; basr %r14, %r5 -; vl %v20, 176(%r15) +; basr %r14, %r5 ; vl %v1, 176(%r15) ; vst %v1, 192(%r15) ; lgr %r2, %r6 -; vst %v20, 0(%r2) -; ld %f8, 192(%r15) -; ld %f9, 200(%r15) -; ld %f10, 208(%r15) -; ld %f11, 216(%r15) -; ld %f12, 224(%r15) -; ld %f13, 232(%r15) -; ld %f14, 240(%r15) -; ld %f15, 248(%r15) -; aghi %r15, 432 +; vl %v19, 192(%r15) +; vst %v19, 0(%r2) +; ld %f8, 208(%r15) +; ld %f9, 216(%r15) +; ld %f10, 224(%r15) +; ld %f11, 232(%r15) +; ld %f12, 240(%r15) +; ld %f13, 248(%r15) +; ld %f14, 256(%r15) +; ld %f15, 264(%r15) +; aghi %r15, 448 ; lmg %r14, %r14, 112(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 ; stmg %r14, %r15, 0x120(%r15) -; aghi %r15, -0x100 -; std %f8, 0xc0(%r15) -; std %f9, 0xc8(%r15) -; std %f10, 0xd0(%r15) -; std %f11, 0xd8(%r15) -; std %f12, 0xe0(%r15) -; std %f13, 0xe8(%r15) -; std %f14, 0xf0(%r15) -; std %f15, 0xf8(%r15) +; aghi %r15, -0x110 +; std %f8, 0xd0(%r15) +; std %f9, 0xd8(%r15) +; std %f10, 0xe0(%r15) +; std %f11, 0xe8(%r15) +; std %f12, 0xf0(%r15) +; std %f13, 0xf8(%r15) +; std %f14, 0x100(%r15) +; std %f15, 0x108(%r15) ; block1: ; offset 0x2a ; lgr %r6, %r2 ; vl %v1, 0(%r3) @@ -199,18 +203,20 @@ block0(v0: i128): ; .byte 0x00, 0x00 ; lg %r5, 0(%r1) ; basr %r14, %r5 -; vl %v20, 0xb0(%r15) +; vl %v1, 0xb0(%r15) +; vst %v1, 0xc0(%r15) ; lgr %r2, %r6 -; vst %v20, 0(%r2) -; ld %f8, 0xc0(%r15) -; ld %f9, 0xc8(%r15) -; ld %f10, 0xd0(%r15) -; ld %f11, 0xd8(%r15) -; ld %f12, 0xe0(%r15) -; ld %f13, 0xe8(%r15) -; ld %f14, 0xf0(%r15) -; ld %f15, 0xf8(%r15) -; aghi %r15, 0x1b0 +; vl %v19, 0xc0(%r15) +; vst %v19, 0(%r2) +; ld %f8, 0xd0(%r15) +; ld %f9, 0xd8(%r15) +; ld %f10, 0xe0(%r15) +; ld %f11, 0xe8(%r15) +; ld %f12, 0xf0(%r15) +; ld %f13, 0xf8(%r15) +; ld %f14, 0x100(%r15) +; ld %f15, 0x108(%r15) +; aghi %r15, 0x1c0 ; lmg %r14, %r14, 0x70(%r15) ; br %r14 @@ -224,7 +230,7 @@ block0(v0: i128): ; VCode: ; stmg %r9, %r15, 248(%r15) -; aghi %r15, -176 +; aghi %r15, -192 ; block0: ; lgr %r9, %r2 ; vl %v1, 0(%r3) @@ -233,18 +239,18 @@ block0(v0: i128): ; la %r3, 160(%r15) ; la %r2, 336(%r15) ; bras %r1, 12 ; data %callee_le + 0 ; lg %r6, 0(%r1) -; basr %r14, %r6 ; callee_pop_size 176 -; vl %v21, 160(%r15) +; basr %r14, %r6 ; callee_pop_size 176 ; vl %v1, 336(%r15) ; vst %v1, 352(%r15) ; lgr %r2, %r9 -; vst %v21, 0(%r2) -; aghi %r15, 352 +; vl %v20, 176(%r15) +; vst %v20, 0(%r2) +; aghi %r15, 368 ; lmg %r9, %r14, 72(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 ; stmg %r9, %r15, 0xf8(%r15) -; aghi %r15, -0xb0 +; aghi %r15, -0xc0 ; block1: ; offset 0xa ; lgr %r9, %r2 ; vl %v1, 0(%r3) @@ -259,10 +265,12 @@ block0(v0: i128): ; .byte 0x00, 0x00 ; lg %r6, 0(%r1) ; basr %r14, %r6 -; vl %v21, 0xa0(%r15) +; vl %v1, 0xa0(%r15) +; vst %v1, 0xb0(%r15) ; lgr %r2, %r9 -; vst %v21, 0(%r2) -; aghi %r15, 0x160 +; vl %v20, 0xb0(%r15) +; vst %v20, 0(%r2) +; aghi %r15, 0x170 ; lmg %r9, %r14, 0x48(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/vec-abi.clif b/cranelift/filetests/filetests/isa/s390x/vec-abi.clif index b86b32f0da..2927e49000 100644 --- a/cranelift/filetests/filetests/isa/s390x/vec-abi.clif +++ b/cranelift/filetests/filetests/isa/s390x/vec-abi.clif @@ -21,8 +21,8 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; vst %v19, 176(%r15) ; vst %v21, 192(%r15) ; vst %v23, 208(%r15) -; bras %r1, 12 ; data %callee_be + 0 ; lg %r4, 0(%r1) -; basr %r14, %r4 +; bras %r1, 12 ; data %callee_be + 0 ; lg %r3, 0(%r1) +; basr %r14, %r3 ; lmg %r14, %r15, 336(%r15) ; br %r14 ; @@ -44,8 +44,8 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 -; lg %r4, 0(%r1) -; basr %r14, %r4 +; lg %r3, 0(%r1) +; basr %r14, %r3 ; lmg %r14, %r15, 0x150(%r15) ; br %r14 @@ -108,10 +108,8 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; verllf %v5, %v3, 16 ; verllh %v7, %v5, 8 ; vst %v7, 208(%r15) -; bras %r1, 12 ; data %callee_le + 0 ; lg %r5, 0(%r1) -; basr %r14, %r5 ; callee_pop_size 224 -; vpdi %v22, %v24, %v24, 4 -; verllg %v24, %v22, 32 +; bras %r1, 12 ; data %callee_le + 0 ; lg %r4, 0(%r1) +; basr %r14, %r4 ; callee_pop_size 224 ; vpdi %v24, %v24, %v24, 4 ; verllg %v24, %v24, 32 ; ld %f8, 160(%r15) ; ld %f9, 168(%r15) ; ld %f10, 176(%r15) @@ -180,10 +178,10 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 -; lg %r5, 0(%r1) -; basr %r14, %r5 -; vpdi %v22, %v24, %v24, 4 -; verllg %v24, %v22, 0x20 +; lg %r4, 0(%r1) +; basr %r14, %r4 +; vpdi %v24, %v24, %v24, 4 +; verllg %v24, %v24, 0x20 ; ld %f8, 0xa0(%r15) ; ld %f9, 0xa8(%r15) ; ld %f10, 0xb0(%r15) @@ -253,10 +251,8 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; verllf %v4, %v2, 16 ; verllh %v6, %v4, 8 ; vst %v6, 208(%r15) -; bras %r1, 12 ; data %callee_be + 0 ; lg %r6, 0(%r1) -; basr %r14, %r6 -; vpdi %v21, %v24, %v24, 4 -; verllg %v24, %v21, 32 +; bras %r1, 12 ; data %callee_be + 0 ; lg %r5, 0(%r1) +; basr %r14, %r5 ; vpdi %v24, %v24, %v24, 4 ; verllg %v24, %v24, 32 ; ld %f8, 224(%r15) ; ld %f9, 232(%r15) ; ld %f10, 240(%r15) @@ -325,10 +321,10 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 -; lg %r6, 0(%r1) -; basr %r14, %r6 -; vpdi %v21, %v24, %v24, 4 -; verllg %v24, %v21, 0x20 +; lg %r5, 0(%r1) +; basr %r14, %r5 +; vpdi %v24, %v24, %v24, 4 +; verllg %v24, %v24, 0x20 ; ld %f8, 0xe0(%r15) ; ld %f9, 0xe8(%r15) ; ld %f10, 0xf0(%r15) @@ -362,8 +358,8 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; vst %v19, 176(%r15) ; vst %v21, 192(%r15) ; vst %v23, 208(%r15) -; bras %r1, 12 ; data %callee_le + 0 ; lg %r7, 0(%r1) -; basr %r14, %r7 ; callee_pop_size 224 +; bras %r1, 12 ; data %callee_le + 0 ; lg %r6, 0(%r1) +; basr %r14, %r6 ; callee_pop_size 224 ; aghi %r15, 384 ; lmg %r14, %r14, 112(%r15) ; br %r14 @@ -387,8 +383,8 @@ block0(v0: i64x2, v1: i32x4, v2: i16x8, v3: i8x16, v4: i64x2, v5: i32x4, v6: i16 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 -; lg %r7, 0(%r1) -; basr %r14, %r7 +; lg %r6, 0(%r1) +; basr %r14, %r6 ; aghi %r15, 0x180 ; lmg %r14, %r14, 0x70(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif b/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif new file mode 100644 index 0000000000..eecbda69d2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/call-with-retval-insts.clif @@ -0,0 +1,179 @@ +test compile precise-output +set enable_multi_ret_implicit_sret +target x86_64 + +function %f(i32) -> i64 { + fn0 = %ext(i32) -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 + +block0(v0: i32): + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20 = call fn0(v0) + + v21 = iadd v1, v2 + v22 = iadd v3, v4 + v23 = iadd v5, v6 + v24 = iadd v7, v8 + v25 = iadd v9, v10 + v26 = iadd v11, v12 + v27 = iadd v13, v14 + v28 = iadd v15, v16 + v29 = iadd v17, v18 + v30 = iadd v19, v20 + + v31 = iadd v21, v22 + v32 = iadd v23, v24 + v33 = iadd v25, v26 + v34 = iadd v27, v28 + v35 = iadd v29, v30 + + v36 = iadd v31, v32 + v37 = iadd v33, v34 + v38 = iadd v35, v36 + v39 = iadd v37, v38 + + return v39 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $304, %rsp +; movq %rbx, 256(%rsp) +; movq %r12, 264(%rsp) +; movq %r13, 272(%rsp) +; movq %r14, 280(%rsp) +; movq %r15, 288(%rsp) +; block0: +; movq %rdi, %rsi +; lea 0(%rsp), %rdi +; load_ext_name %ext+0, %r10 +; call *%r10 +; lea 0(%rax,%rdx,1), %r8 +; lea 0(%rbx,%r15,1), %r9 +; lea 0(%r13,%r12,1), %r10 +; movq rsp(0 + virtual offset), %rcx +; lea 0(%rcx,%r14,1), %r11 +; movq rsp(8 + virtual offset), %rcx +; movq rsp(16 + virtual offset), %rdi +; lea 0(%rcx,%rdi,1), %rsi +; movq rsp(32 + virtual offset), %rdx +; movq rsp(24 + virtual offset), %rdi +; lea 0(%rdi,%rdx,1), %rdi +; movq rsp(40 + virtual offset), %rax +; movq rsp(48 + virtual offset), %rcx +; lea 0(%rax,%rcx,1), %rax +; movq rsp(64 + virtual offset), %rcx +; movq rsp(56 + virtual offset), %rdx +; lea 0(%rdx,%rcx,1), %rcx +; movq rsp(80 + virtual offset), %rdx +; movq rsp(72 + virtual offset), %r14 +; lea 0(%r14,%rdx,1), %rdx +; movq rsp(96 + virtual offset), %rbx +; movq rsp(88 + virtual offset), %r13 +; lea 0(%r13,%rbx,1), %r14 +; lea 0(%r8,%r9,1), %r8 +; lea 0(%r10,%r11,1), %r9 +; lea 0(%rsi,%rdi,1), %r10 +; lea 0(%rax,%rcx,1), %r11 +; lea 0(%rdx,%r14,1), %rsi +; lea 0(%r8,%r9,1), %r8 +; lea 0(%r10,%r11,1), %r9 +; lea 0(%rsi,%r8,1), %r8 +; lea 0(%r9,%r8,1), %rax +; movq 256(%rsp), %rbx +; movq 264(%rsp), %r12 +; movq 272(%rsp), %r13 +; movq 280(%rsp), %r14 +; movq 288(%rsp), %r15 +; addq %rsp, $304, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x130, %rsp +; movq %rbx, 0x100(%rsp) +; movq %r12, 0x108(%rsp) +; movq %r13, 0x110(%rsp) +; movq %r14, 0x118(%rsp) +; movq %r15, 0x120(%rsp) +; block1: ; offset 0x33 +; movq %rdi, %rsi +; leaq (%rsp), %rdi +; movabsq $0, %r10 ; reloc_external Abs8 %ext 0 +; callq *%r10 +; movq (%rsp), %rbx +; movq 8(%rsp), %r15 +; movq 0x10(%rsp), %r13 +; movq 0x18(%rsp), %r12 +; movq 0x20(%rsp), %r11 +; movq %r11, 0x90(%rsp) +; movq 0x28(%rsp), %r14 +; movq 0x30(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x38(%rsp), %r11 +; movq %r11, 0xa0(%rsp) +; movq 0x40(%rsp), %r11 +; movq %r11, 0xa8(%rsp) +; movq 0x48(%rsp), %r11 +; movq %r11, 0xb0(%rsp) +; movq 0x50(%rsp), %r11 +; movq %r11, 0xb8(%rsp) +; movq 0x58(%rsp), %r11 +; movq %r11, 0xc0(%rsp) +; movq 0x60(%rsp), %r11 +; movq %r11, 0xc8(%rsp) +; movq 0x68(%rsp), %r11 +; movq %r11, 0xd0(%rsp) +; movq 0x70(%rsp), %r11 +; movq %r11, 0xd8(%rsp) +; movq 0x78(%rsp), %r11 +; movq %r11, 0xe0(%rsp) +; movq 0x80(%rsp), %r11 +; movq %r11, 0xe8(%rsp) +; movq 0x88(%rsp), %r11 +; movq %r11, 0xf0(%rsp) +; leaq (%rax, %rdx), %r8 +; leaq (%rbx, %r15), %r9 +; leaq (%r13, %r12), %r10 +; movq 0x90(%rsp), %rcx +; leaq (%rcx, %r14), %r11 +; movq 0x98(%rsp), %rcx +; movq 0xa0(%rsp), %rdi +; leaq (%rcx, %rdi), %rsi +; movq 0xb0(%rsp), %rdx +; movq 0xa8(%rsp), %rdi +; addq %rdx, %rdi +; movq 0xb8(%rsp), %rax +; movq 0xc0(%rsp), %rcx +; addq %rcx, %rax +; movq 0xd0(%rsp), %rcx +; movq 0xc8(%rsp), %rdx +; addq %rdx, %rcx +; movq 0xe0(%rsp), %rdx +; movq 0xd8(%rsp), %r14 +; addq %r14, %rdx +; movq 0xf0(%rsp), %rbx +; movq 0xe8(%rsp), %r13 +; leaq (%r13, %rbx), %r14 +; addq %r9, %r8 +; leaq (%r10, %r11), %r9 +; leaq (%rsi, %rdi), %r10 +; leaq (%rax, %rcx), %r11 +; leaq (%rdx, %r14), %rsi +; addq %r9, %r8 +; leaq (%r10, %r11), %r9 +; addq %rsi, %r8 +; leaq (%r9, %r8), %rax +; movq 0x100(%rsp), %rbx +; movq 0x108(%rsp), %r12 +; movq 0x110(%rsp), %r13 +; movq 0x118(%rsp), %r14 +; movq 0x120(%rsp), %r15 +; addq $0x130, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 1a03ba7aa5..6b16c75d3f 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1287,16 +1287,17 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $32, %rsp -; movq %r13, 16(%rsp) +; movq %r12, 16(%rsp) +; movq %r13, 24(%rsp) ; block0: ; movq %rdi, %r13 ; lea 0(%rsp), %rdi ; load_ext_name %g+0, %r9 ; call *%r9 -; movq 0(%rsp), %r8 ; movq %r13, %rdi -; movq %r8, 0(%rdi) -; movq 16(%rsp), %r13 +; movq %r12, 0(%rdi) +; movq 16(%rsp), %r12 +; movq 24(%rsp), %r13 ; addq %rsp, $32, %rsp ; movq %rbp, %rsp ; popq %rbp @@ -1307,16 +1308,18 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; subq $0x20, %rsp -; movq %r13, 0x10(%rsp) -; block1: ; offset 0xd +; movq %r12, 0x10(%rsp) +; movq %r13, 0x18(%rsp) +; block1: ; offset 0x12 ; movq %rdi, %r13 ; leaq (%rsp), %rdi ; movabsq $0, %r9 ; reloc_external Abs8 %g 0 ; callq *%r9 -; movq (%rsp), %r8 +; movq (%rsp), %r12 ; movq %r13, %rdi -; movq %r8, (%rdi) -; movq 0x10(%rsp), %r13 +; movq %r12, (%rdi) +; movq 0x10(%rsp), %r12 +; movq 0x18(%rsp), %r13 ; addq $0x20, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif index 828be1ef89..44e26d71a9 100644 --- a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif @@ -208,24 +208,24 @@ block0: ; movq %r9, rsp(64 + virtual offset) ; movl $40, %r10d ; movq %r10, rsp(56 + virtual offset) -; movl $45, %r11d -; movq %r11, rsp(48 + virtual offset) -; movl $50, %r11d -; movq %r11, rsp(40 + virtual offset) +; movl $45, %r10d +; movq %r10, rsp(48 + virtual offset) +; movl $50, %r13d ; movl $55, %r14d ; movl $60, %r15d ; movl $65, %ebx ; movl $70, %r12d -; movl $75, %r13d +; movl $75, %r11d ; movl $80, %eax ; movl $85, %ecx ; movl $90, %edx ; movl $95, %esi -; movq %rsi, rsp(32 + virtual offset) +; movq %rsi, rsp(40 + virtual offset) ; movl $100, %r8d ; movl $105, %r9d ; movl $110, %r10d -; movl $115, %r11d +; movl $115, %esi +; movq %rsi, rsp(32 + virtual offset) ; movl $120, %esi ; movq %rsi, rsp(24 + virtual offset) ; movl $125, %esi @@ -234,28 +234,30 @@ block0: ; movq %rsi, rsp(8 + virtual offset) ; movl $135, %esi ; movq %rsi, rsp(0 + virtual offset) -; movq %r14, 0(%rdi) -; movq %r15, 8(%rdi) -; movq %rbx, 16(%rdi) -; movq %r12, 24(%rdi) -; movq %r13, 32(%rdi) -; movq %rax, 40(%rdi) -; movq %rcx, 48(%rdi) -; movq %rdx, 56(%rdi) -; movq rsp(32 + virtual offset), %rax -; movq %rax, 64(%rdi) -; movq %r8, 72(%rdi) -; movq %r9, 80(%rdi) -; movq %r10, 88(%rdi) -; movq %r11, 96(%rdi) -; movq rsp(24 + virtual offset), %rsi +; movq %r13, 0(%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 16(%rdi) +; movq %rbx, 24(%rdi) +; movq %r12, 32(%rdi) +; movq %r11, 40(%rdi) +; movq %rax, 48(%rdi) +; movq %rcx, 56(%rdi) +; movq %rdx, 64(%rdi) +; movq rsp(40 + virtual offset), %rax +; movq %rax, 72(%rdi) +; movq %r8, 80(%rdi) +; movq %r9, 88(%rdi) +; movq %r10, 96(%rdi) +; movq rsp(32 + virtual offset), %rsi ; movq %rsi, 104(%rdi) -; movq rsp(16 + virtual offset), %rsi +; movq rsp(24 + virtual offset), %rsi ; movq %rsi, 112(%rdi) -; movq rsp(8 + virtual offset), %rsi +; movq rsp(16 + virtual offset), %rsi ; movq %rsi, 120(%rdi) -; movq rsp(0 + virtual offset), %rsi +; movq rsp(8 + virtual offset), %rsi ; movq %rsi, 128(%rdi) +; movq rsp(0 + virtual offset), %rsi +; movq %rsi, 136(%rdi) ; movq rsp(104 + virtual offset), %rax ; movq rsp(96 + virtual offset), %rcx ; movq rsp(88 + virtual offset), %rdx @@ -264,7 +266,6 @@ block0: ; movq rsp(64 + virtual offset), %r8 ; movq rsp(56 + virtual offset), %r9 ; movq rsp(48 + virtual offset), %r10 -; movq rsp(40 + virtual offset), %r11 ; movq 112(%rsp), %rbx ; movq 120(%rsp), %r12 ; movq 128(%rsp), %r13 @@ -300,24 +301,24 @@ block0: ; movq %r9, 0x40(%rsp) ; movl $0x28, %r10d ; movq %r10, 0x38(%rsp) -; movl $0x2d, %r11d -; movq %r11, 0x30(%rsp) -; movl $0x32, %r11d -; movq %r11, 0x28(%rsp) +; movl $0x2d, %r10d +; movq %r10, 0x30(%rsp) +; movl $0x32, %r13d ; movl $0x37, %r14d ; movl $0x3c, %r15d ; movl $0x41, %ebx ; movl $0x46, %r12d -; movl $0x4b, %r13d +; movl $0x4b, %r11d ; movl $0x50, %eax ; movl $0x55, %ecx ; movl $0x5a, %edx ; movl $0x5f, %esi -; movq %rsi, 0x20(%rsp) +; movq %rsi, 0x28(%rsp) ; movl $0x64, %r8d ; movl $0x69, %r9d ; movl $0x6e, %r10d -; movl $0x73, %r11d +; movl $0x73, %esi +; movq %rsi, 0x20(%rsp) ; movl $0x78, %esi ; movq %rsi, 0x18(%rsp) ; movl $0x7d, %esi @@ -326,28 +327,30 @@ block0: ; movq %rsi, 8(%rsp) ; movl $0x87, %esi ; movq %rsi, (%rsp) -; movq %r14, (%rdi) -; movq %r15, 8(%rdi) -; movq %rbx, 0x10(%rdi) -; movq %r12, 0x18(%rdi) -; movq %r13, 0x20(%rdi) -; movq %rax, 0x28(%rdi) -; movq %rcx, 0x30(%rdi) -; movq %rdx, 0x38(%rdi) -; movq 0x20(%rsp), %rax -; movq %rax, 0x40(%rdi) -; movq %r8, 0x48(%rdi) -; movq %r9, 0x50(%rdi) -; movq %r10, 0x58(%rdi) -; movq %r11, 0x60(%rdi) -; movq 0x18(%rsp), %rsi +; movq %r13, (%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 0x10(%rdi) +; movq %rbx, 0x18(%rdi) +; movq %r12, 0x20(%rdi) +; movq %r11, 0x28(%rdi) +; movq %rax, 0x30(%rdi) +; movq %rcx, 0x38(%rdi) +; movq %rdx, 0x40(%rdi) +; movq 0x28(%rsp), %rax +; movq %rax, 0x48(%rdi) +; movq %r8, 0x50(%rdi) +; movq %r9, 0x58(%rdi) +; movq %r10, 0x60(%rdi) +; movq 0x20(%rsp), %rsi ; movq %rsi, 0x68(%rdi) -; movq 0x10(%rsp), %rsi +; movq 0x18(%rsp), %rsi ; movq %rsi, 0x70(%rdi) -; movq 8(%rsp), %rsi +; movq 0x10(%rsp), %rsi ; movq %rsi, 0x78(%rdi) -; movq (%rsp), %rsi +; movq 8(%rsp), %rsi ; movq %rsi, 0x80(%rdi) +; movq (%rsp), %rsi +; movq %rsi, 0x88(%rdi) ; movq 0x68(%rsp), %rax ; movq 0x60(%rsp), %rcx ; movq 0x58(%rsp), %rdx @@ -356,7 +359,6 @@ block0: ; movq 0x40(%rsp), %r8 ; movq 0x38(%rsp), %r9 ; movq 0x30(%rsp), %r10 -; movq 0x28(%rsp), %r11 ; movq 0x70(%rsp), %rbx ; movq 0x78(%rsp), %r12 ; movq 0x80(%rsp), %r13 @@ -378,28 +380,22 @@ block0: ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $144, %rsp +; subq %rsp, $304, %rsp +; movq %rbx, 256(%rsp) +; movq %r12, 264(%rsp) +; movq %r13, 272(%rsp) +; movq %r14, 280(%rsp) +; movq %r15, 288(%rsp) ; block0: ; lea 0(%rsp), %rdi ; call TestCase(%tail_callee_stack_rets) -; movq 0(%rsp), %rax -; movq 8(%rsp), %rdx -; movq 16(%rsp), %r9 -; movq 24(%rsp), %r11 -; movq 32(%rsp), %rdi -; movq 40(%rsp), %rcx -; movq 48(%rsp), %r8 -; movq 56(%rsp), %r10 -; movq 64(%rsp), %rsi -; movq 72(%rsp), %rax -; movq 80(%rsp), %rdx -; movq 88(%rsp), %r9 -; movq 96(%rsp), %r11 -; movq 104(%rsp), %rdi -; movq 112(%rsp), %rcx -; movq 120(%rsp), %r8 -; movq 128(%rsp), %rax -; addq %rsp, $144, %rsp +; movq rsp(96 + virtual offset), %rax +; movq 256(%rsp), %rbx +; movq 264(%rsp), %r12 +; movq 272(%rsp), %r13 +; movq 280(%rsp), %r14 +; movq 288(%rsp), %r15 +; addq %rsp, $304, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -408,28 +404,53 @@ block0: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x90, %rsp -; block1: ; offset 0xb +; subq $0x130, %rsp +; movq %rbx, 0x100(%rsp) +; movq %r12, 0x108(%rsp) +; movq %r13, 0x110(%rsp) +; movq %r14, 0x118(%rsp) +; movq %r15, 0x120(%rsp) +; block1: ; offset 0x33 ; leaq (%rsp), %rdi -; callq 0x14 ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 -; movq (%rsp), %rax -; movq 8(%rsp), %rdx -; movq 0x10(%rsp), %r9 +; callq 0x3c ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 +; movq (%rsp), %r11 +; movq %r11, 0x90(%rsp) +; movq 8(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x10(%rsp), %r11 +; movq %r11, 0xa0(%rsp) ; movq 0x18(%rsp), %r11 -; movq 0x20(%rsp), %rdi -; movq 0x28(%rsp), %rcx -; movq 0x30(%rsp), %r8 -; movq 0x38(%rsp), %r10 -; movq 0x40(%rsp), %rsi -; movq 0x48(%rsp), %rax -; movq 0x50(%rsp), %rdx -; movq 0x58(%rsp), %r9 -; movq 0x60(%rsp), %r11 -; movq 0x68(%rsp), %rdi -; movq 0x70(%rsp), %rcx -; movq 0x78(%rsp), %r8 -; movq 0x80(%rsp), %rax -; addq $0x90, %rsp +; movq %r11, 0xa8(%rsp) +; movq 0x20(%rsp), %r11 +; movq %r11, 0xb0(%rsp) +; movq 0x28(%rsp), %r11 +; movq %r11, 0xb8(%rsp) +; movq 0x30(%rsp), %r11 +; movq %r11, 0xc0(%rsp) +; movq 0x38(%rsp), %r11 +; movq %r11, 0xc8(%rsp) +; movq 0x40(%rsp), %r11 +; movq %r11, 0xd0(%rsp) +; movq 0x48(%rsp), %r11 +; movq %r11, 0xd8(%rsp) +; movq 0x50(%rsp), %r11 +; movq %r11, 0xe0(%rsp) +; movq 0x58(%rsp), %r11 +; movq %r11, 0xe8(%rsp) +; movq 0x60(%rsp), %rbx +; movq 0x68(%rsp), %r12 +; movq 0x70(%rsp), %r13 +; movq 0x78(%rsp), %r14 +; movq 0x80(%rsp), %r15 +; movq 0x88(%rsp), %r11 +; movq %r11, 0xf0(%rsp) +; movq 0xf0(%rsp), %rax +; movq 0x100(%rsp), %rbx +; movq 0x108(%rsp), %r12 +; movq 0x110(%rsp), %r13 +; movq 0x118(%rsp), %r14 +; movq 0x120(%rsp), %r15 +; addq $0x130, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq @@ -461,52 +482,56 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq %r9, rsp(40 + virtual offset) ; movq rbp(stack args max - 168), %r10 ; movq %r10, rsp(48 + virtual offset) -; movq rbp(stack args max - 160), %r11 -; movq %r11, rsp(56 + virtual offset) -; movq rbp(stack args max - 152), %r11 -; movq %r11, rsp(64 + virtual offset) +; movq rbp(stack args max - 160), %r10 +; movq %r10, rsp(56 + virtual offset) +; movq rbp(stack args max - 152), %rcx +; movq %rcx, rsp(64 + virtual offset) ; movq rbp(stack args max - 144), %r8 +; movq %r8, rsp(72 + virtual offset) ; movq rbp(stack args max - 136), %r10 -; movq %r10, rsp(72 + virtual offset) +; movq %r10, rsp(80 + virtual offset) ; movq rbp(stack args max - 128), %rsi -; movq %rsi, rsp(80 + virtual offset) +; movq %rsi, rsp(88 + virtual offset) ; movq rbp(stack args max - 120), %rax -; movq %rax, rsp(88 + virtual offset) -; movq rbp(stack args max - 112), %r11 -; movq rbp(stack args max - 104), %r10 -; movq rbp(stack args max - 96), %r9 -; movq rbp(stack args max - 88), %rax ; movq %rax, rsp(96 + virtual offset) -; movq rbp(stack args max - 80), %rdx -; movq rbp(stack args max - 72), %rcx -; movq rbp(stack args max - 64), %rsi +; movq rbp(stack args max - 112), %r10 +; movq rbp(stack args max - 104), %r9 +; movq rbp(stack args max - 96), %r8 +; movq rbp(stack args max - 88), %rdx +; movq rbp(stack args max - 80), %rcx +; movq %rcx, rsp(104 + virtual offset) +; movq rbp(stack args max - 72), %rsi +; movq rbp(stack args max - 64), %r15 ; movq rbp(stack args max - 56), %r12 ; movq rbp(stack args max - 48), %r14 ; movq rbp(stack args max - 40), %rbx ; movq rbp(stack args max - 32), %r13 -; movq rbp(stack args max - 24), %r15 +; movq rbp(stack args max - 24), %r11 ; movq rbp(stack args max - 16), %rax -; movq %r8, 0(%rdi) -; movq rsp(72 + virtual offset), %r8 -; movq %r8, 8(%rdi) -; movq rsp(80 + virtual offset), %r8 -; movq %r8, 16(%rdi) -; movq rsp(88 + virtual offset), %r8 -; movq %r8, 24(%rdi) -; movq %r11, 32(%rdi) +; movq rsp(64 + virtual offset), %rcx +; movq %rcx, 0(%rdi) +; movq rsp(72 + virtual offset), %rcx +; movq %rcx, 8(%rdi) +; movq rsp(80 + virtual offset), %rcx +; movq %rcx, 16(%rdi) +; movq rsp(88 + virtual offset), %rcx +; movq %rcx, 24(%rdi) +; movq rsp(96 + virtual offset), %rcx +; movq %rcx, 32(%rdi) ; movq %r10, 40(%rdi) ; movq %r9, 48(%rdi) -; movq rsp(96 + virtual offset), %r8 ; movq %r8, 56(%rdi) ; movq %rdx, 64(%rdi) -; movq %rcx, 72(%rdi) +; movq rsp(104 + virtual offset), %rdx +; movq %rdx, 72(%rdi) ; movq %rsi, 80(%rdi) -; movq %r12, 88(%rdi) -; movq %r14, 96(%rdi) -; movq %rbx, 104(%rdi) -; movq %r13, 112(%rdi) -; movq %r15, 120(%rdi) -; movq %rax, 128(%rdi) +; movq %r15, 88(%rdi) +; movq %r12, 96(%rdi) +; movq %r14, 104(%rdi) +; movq %rbx, 112(%rdi) +; movq %r13, 120(%rdi) +; movq %r11, 128(%rdi) +; movq %rax, 136(%rdi) ; movq rsp(0 + virtual offset), %rax ; movq rsp(8 + virtual offset), %rcx ; movq rsp(16 + virtual offset), %rdx @@ -515,7 +540,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq rsp(40 + virtual offset), %r8 ; movq rsp(48 + virtual offset), %r9 ; movq rsp(56 + virtual offset), %r10 -; movq rsp(64 + virtual offset), %r11 ; movq 112(%rsp), %rbx ; movq 120(%rsp), %r12 ; movq 128(%rsp), %r13 @@ -546,52 +570,56 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq %r9, 0x28(%rsp) ; movq 0x18(%rbp), %r10 ; movq %r10, 0x30(%rsp) -; movq 0x20(%rbp), %r11 -; movq %r11, 0x38(%rsp) -; movq 0x28(%rbp), %r11 -; movq %r11, 0x40(%rsp) +; movq 0x20(%rbp), %r10 +; movq %r10, 0x38(%rsp) +; movq 0x28(%rbp), %rcx +; movq %rcx, 0x40(%rsp) ; movq 0x30(%rbp), %r8 +; movq %r8, 0x48(%rsp) ; movq 0x38(%rbp), %r10 -; movq %r10, 0x48(%rsp) +; movq %r10, 0x50(%rsp) ; movq 0x40(%rbp), %rsi -; movq %rsi, 0x50(%rsp) +; movq %rsi, 0x58(%rsp) ; movq 0x48(%rbp), %rax -; movq %rax, 0x58(%rsp) -; movq 0x50(%rbp), %r11 -; movq 0x58(%rbp), %r10 -; movq 0x60(%rbp), %r9 -; movq 0x68(%rbp), %rax ; movq %rax, 0x60(%rsp) -; movq 0x70(%rbp), %rdx -; movq 0x78(%rbp), %rcx -; movq 0x80(%rbp), %rsi +; movq 0x50(%rbp), %r10 +; movq 0x58(%rbp), %r9 +; movq 0x60(%rbp), %r8 +; movq 0x68(%rbp), %rdx +; movq 0x70(%rbp), %rcx +; movq %rcx, 0x68(%rsp) +; movq 0x78(%rbp), %rsi +; movq 0x80(%rbp), %r15 ; movq 0x88(%rbp), %r12 ; movq 0x90(%rbp), %r14 ; movq 0x98(%rbp), %rbx ; movq 0xa0(%rbp), %r13 -; movq 0xa8(%rbp), %r15 +; movq 0xa8(%rbp), %r11 ; movq 0xb0(%rbp), %rax -; movq %r8, (%rdi) -; movq 0x48(%rsp), %r8 -; movq %r8, 8(%rdi) -; movq 0x50(%rsp), %r8 -; movq %r8, 0x10(%rdi) -; movq 0x58(%rsp), %r8 -; movq %r8, 0x18(%rdi) -; movq %r11, 0x20(%rdi) +; movq 0x40(%rsp), %rcx +; movq %rcx, (%rdi) +; movq 0x48(%rsp), %rcx +; movq %rcx, 8(%rdi) +; movq 0x50(%rsp), %rcx +; movq %rcx, 0x10(%rdi) +; movq 0x58(%rsp), %rcx +; movq %rcx, 0x18(%rdi) +; movq 0x60(%rsp), %rcx +; movq %rcx, 0x20(%rdi) ; movq %r10, 0x28(%rdi) ; movq %r9, 0x30(%rdi) -; movq 0x60(%rsp), %r8 ; movq %r8, 0x38(%rdi) ; movq %rdx, 0x40(%rdi) -; movq %rcx, 0x48(%rdi) +; movq 0x68(%rsp), %rdx +; movq %rdx, 0x48(%rdi) ; movq %rsi, 0x50(%rdi) -; movq %r12, 0x58(%rdi) -; movq %r14, 0x60(%rdi) -; movq %rbx, 0x68(%rdi) -; movq %r13, 0x70(%rdi) -; movq %r15, 0x78(%rdi) -; movq %rax, 0x80(%rdi) +; movq %r15, 0x58(%rdi) +; movq %r12, 0x60(%rdi) +; movq %r14, 0x68(%rdi) +; movq %rbx, 0x70(%rdi) +; movq %r13, 0x78(%rdi) +; movq %r11, 0x80(%rdi) +; movq %rax, 0x88(%rdi) ; movq (%rsp), %rax ; movq 8(%rsp), %rcx ; movq 0x10(%rsp), %rdx @@ -600,7 +628,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; movq 0x28(%rsp), %r8 ; movq 0x30(%rsp), %r9 ; movq 0x38(%rsp), %r10 -; movq 0x40(%rsp), %r11 ; movq 0x70(%rsp), %rbx ; movq 0x78(%rsp), %r12 ; movq 0x80(%rsp), %r13 @@ -656,17 +683,17 @@ block0: ; movq %r15, 464(%rsp) ; block0: ; movl $10, %edx -; movq %rdx, rsp(96 + virtual offset) +; movq %rdx, rsp(88 + virtual offset) ; movl $15, %ecx -; movq %rcx, rsp(88 + virtual offset) +; movq %rcx, rsp(80 + virtual offset) ; movl $20, %r8d -; movq %r8, rsp(80 + virtual offset) +; movq %r8, rsp(72 + virtual offset) ; movl $25, %r9d -; movq %r9, rsp(72 + virtual offset) -; movl $30, %r9d ; movq %r9, rsp(64 + virtual offset) +; movl $30, %r9d +; movq %r9, rsp(56 + virtual offset) ; movl $35, %esi -; movq %rsi, rsp(56 + virtual offset) +; movq %rsi, rsp(48 + virtual offset) ; movl $40, %edi ; movl $45, %eax ; movl $50, %r10d @@ -681,20 +708,20 @@ block0: ; movl $95, %r8d ; movl $100, %r9d ; movl $105, %r11d -; movq %r11, rsp(48 + virtual offset) -; movl $110, %r11d ; movq %r11, rsp(40 + virtual offset) -; movl $115, %r11d +; movl $110, %r11d ; movq %r11, rsp(32 + virtual offset) -; movl $120, %r11d +; movl $115, %r11d ; movq %r11, rsp(24 + virtual offset) -; movl $125, %r11d +; movl $120, %r11d ; movq %r11, rsp(16 + virtual offset) -; movl $130, %r11d +; movl $125, %r11d ; movq %r11, rsp(8 + virtual offset) -; movl $135, %r11d +; movl $130, %r11d ; movq %r11, rsp(0 + virtual offset) -; movq rsp(56 + virtual offset), %r11 +; movl $135, %r11d +; movq %r11, rsp(96 + virtual offset) +; movq rsp(48 + virtual offset), %r11 ; movq %r11, 0(%rsp) ; movq %rdi, 8(%rsp) ; movq %rax, 16(%rsp) @@ -709,45 +736,29 @@ block0: ; movq %rcx, 88(%rsp) ; movq %r8, 96(%rsp) ; movq %r9, 104(%rsp) -; movq rsp(48 + virtual offset), %r11 -; movq %r11, 112(%rsp) ; movq rsp(40 + virtual offset), %r11 -; movq %r11, 120(%rsp) +; movq %r11, 112(%rsp) ; movq rsp(32 + virtual offset), %r11 -; movq %r11, 128(%rsp) +; movq %r11, 120(%rsp) ; movq rsp(24 + virtual offset), %r11 -; movq %r11, 136(%rsp) +; movq %r11, 128(%rsp) ; movq rsp(16 + virtual offset), %r11 -; movq %r11, 144(%rsp) +; movq %r11, 136(%rsp) ; movq rsp(8 + virtual offset), %r11 -; movq %r11, 152(%rsp) +; movq %r11, 144(%rsp) ; movq rsp(0 + virtual offset), %r11 +; movq %r11, 152(%rsp) +; movq rsp(96 + virtual offset), %r11 ; movq %r11, 160(%rsp) ; lea 176(%rsp), %rdi ; load_ext_name %tail_callee_stack_args_and_rets+0, %r10 -; movq rsp(80 + virtual offset), %rcx -; movq rsp(88 + virtual offset), %rdx -; movq rsp(96 + virtual offset), %rsi -; movq rsp(72 + virtual offset), %r8 -; movq rsp(64 + virtual offset), %r9 +; movq rsp(72 + virtual offset), %rcx +; movq rsp(80 + virtual offset), %rdx +; movq rsp(88 + virtual offset), %rsi +; movq rsp(64 + virtual offset), %r8 +; movq rsp(56 + virtual offset), %r9 ; call *%r10 -; movq 176(%rsp), %r8 -; movq 184(%rsp), %r10 -; movq 192(%rsp), %rsi -; movq 200(%rsp), %rax -; movq 208(%rsp), %rdx -; movq 216(%rsp), %r9 -; movq 224(%rsp), %r11 -; movq 232(%rsp), %rdi -; movq 240(%rsp), %rcx -; movq 248(%rsp), %r8 -; movq 256(%rsp), %r10 -; movq 264(%rsp), %rsi -; movq 272(%rsp), %rax -; movq 280(%rsp), %rdx -; movq 288(%rsp), %r9 -; movq 296(%rsp), %r11 -; movq 304(%rsp), %rax +; movq rsp(96 + virtual offset), %rax ; movq 432(%rsp), %rbx ; movq 440(%rsp), %r12 ; movq 448(%rsp), %r13 @@ -770,17 +781,17 @@ block0: ; movq %r15, 0x1d0(%rsp) ; block1: ; offset 0x33 ; movl $0xa, %edx -; movq %rdx, 0x1a0(%rsp) +; movq %rdx, 0x198(%rsp) ; movl $0xf, %ecx -; movq %rcx, 0x198(%rsp) +; movq %rcx, 0x190(%rsp) ; movl $0x14, %r8d -; movq %r8, 0x190(%rsp) +; movq %r8, 0x188(%rsp) ; movl $0x19, %r9d -; movq %r9, 0x188(%rsp) -; movl $0x1e, %r9d ; movq %r9, 0x180(%rsp) +; movl $0x1e, %r9d +; movq %r9, 0x178(%rsp) ; movl $0x23, %esi -; movq %rsi, 0x178(%rsp) +; movq %rsi, 0x170(%rsp) ; movl $0x28, %edi ; movl $0x2d, %eax ; movl $0x32, %r10d @@ -795,20 +806,20 @@ block0: ; movl $0x5f, %r8d ; movl $0x64, %r9d ; movl $0x69, %r11d -; movq %r11, 0x170(%rsp) -; movl $0x6e, %r11d ; movq %r11, 0x168(%rsp) -; movl $0x73, %r11d +; movl $0x6e, %r11d ; movq %r11, 0x160(%rsp) -; movl $0x78, %r11d +; movl $0x73, %r11d ; movq %r11, 0x158(%rsp) -; movl $0x7d, %r11d +; movl $0x78, %r11d ; movq %r11, 0x150(%rsp) -; movl $0x82, %r11d +; movl $0x7d, %r11d ; movq %r11, 0x148(%rsp) -; movl $0x87, %r11d +; movl $0x82, %r11d ; movq %r11, 0x140(%rsp) -; movq 0x178(%rsp), %r11 +; movl $0x87, %r11d +; movq %r11, 0x1a0(%rsp) +; movq 0x170(%rsp), %r11 ; movq %r11, (%rsp) ; movq %rdi, 8(%rsp) ; movq %rax, 0x10(%rsp) @@ -823,46 +834,61 @@ block0: ; movq %rcx, 0x58(%rsp) ; movq %r8, 0x60(%rsp) ; movq %r9, 0x68(%rsp) -; movq 0x170(%rsp), %r11 -; movq %r11, 0x70(%rsp) ; movq 0x168(%rsp), %r11 -; movq %r11, 0x78(%rsp) +; movq %r11, 0x70(%rsp) ; movq 0x160(%rsp), %r11 -; movq %r11, 0x80(%rsp) +; movq %r11, 0x78(%rsp) ; movq 0x158(%rsp), %r11 -; movq %r11, 0x88(%rsp) +; movq %r11, 0x80(%rsp) ; movq 0x150(%rsp), %r11 -; movq %r11, 0x90(%rsp) +; movq %r11, 0x88(%rsp) ; movq 0x148(%rsp), %r11 -; movq %r11, 0x98(%rsp) +; movq %r11, 0x90(%rsp) ; movq 0x140(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; movq 0x1a0(%rsp), %r11 ; movq %r11, 0xa0(%rsp) ; leaq 0xb0(%rsp), %rdi ; movabsq $0, %r10 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 -; movq 0x190(%rsp), %rcx -; movq 0x198(%rsp), %rdx -; movq 0x1a0(%rsp), %rsi -; movq 0x188(%rsp), %r8 -; movq 0x180(%rsp), %r9 +; movq 0x188(%rsp), %rcx +; movq 0x190(%rsp), %rdx +; movq 0x198(%rsp), %rsi +; movq 0x180(%rsp), %r8 +; movq 0x178(%rsp), %r9 ; callq *%r10 ; subq $0xb0, %rsp -; movq 0xb0(%rsp), %r8 -; movq 0xb8(%rsp), %r10 -; movq 0xc0(%rsp), %rsi -; movq 0xc8(%rsp), %rax -; movq 0xd0(%rsp), %rdx -; movq 0xd8(%rsp), %r9 +; movq 0xb0(%rsp), %r11 +; movq %r11, 0x140(%rsp) +; movq 0xb8(%rsp), %r11 +; movq %r11, 0x148(%rsp) +; movq 0xc0(%rsp), %r11 +; movq %r11, 0x150(%rsp) +; movq 0xc8(%rsp), %r11 +; movq %r11, 0x158(%rsp) +; movq 0xd0(%rsp), %r11 +; movq %r11, 0x160(%rsp) +; movq 0xd8(%rsp), %r11 +; movq %r11, 0x168(%rsp) ; movq 0xe0(%rsp), %r11 -; movq 0xe8(%rsp), %rdi -; movq 0xf0(%rsp), %rcx -; movq 0xf8(%rsp), %r8 -; movq 0x100(%rsp), %r10 -; movq 0x108(%rsp), %rsi -; movq 0x110(%rsp), %rax -; movq 0x118(%rsp), %rdx -; movq 0x120(%rsp), %r9 -; movq 0x128(%rsp), %r11 -; movq 0x130(%rsp), %rax +; movq %r11, 0x170(%rsp) +; movq 0xe8(%rsp), %r11 +; movq %r11, 0x178(%rsp) +; movq 0xf0(%rsp), %r11 +; movq %r11, 0x180(%rsp) +; movq 0xf8(%rsp), %r11 +; movq %r11, 0x188(%rsp) +; movq 0x100(%rsp), %r11 +; movq %r11, 0x190(%rsp) +; movq 0x108(%rsp), %r11 +; movq %r11, 0x198(%rsp) +; movq 0x110(%rsp), %rbx +; movq 0x118(%rsp), %r12 +; movq 0x120(%rsp), %r13 +; movq 0x128(%rsp), %r14 +; movq 0x130(%rsp), %r15 +; movq 0x138(%rsp), %r11 +; movq %r11, 0x1a0(%rsp) +; movq 0x1a0(%rsp), %rax ; movq 0x1b0(%rsp), %rbx ; movq 0x1b8(%rsp), %r12 ; movq 0x1c0(%rsp), %r13 diff --git a/cranelift/filetests/filetests/isa/x64/winch.clif b/cranelift/filetests/filetests/isa/x64/winch.clif index 2bda2fd16b..4b227d0ac7 100644 --- a/cranelift/filetests/filetests/isa/x64/winch.clif +++ b/cranelift/filetests/filetests/isa/x64/winch.clif @@ -291,25 +291,25 @@ block0(v0:i64): ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $64, %rsp -; movq %rbx, 16(%rsp) -; movq %r12, 24(%rsp) -; movq %r13, 32(%rsp) -; movq %r14, 40(%rsp) -; movq %r15, 48(%rsp) +; subq %rsp, $80, %rsp +; movq %rbx, 32(%rsp) +; movq %r12, 40(%rsp) +; movq %r13, 48(%rsp) +; movq %r14, 56(%rsp) +; movq %r15, 64(%rsp) ; block0: ; lea 0(%rsp), %rdi ; load_ext_name %g+0, %r10 ; call *%r10 -; movq 4(%rsp), %rax -; movq 0(%rsp), %r9 -; andl %r9d, %eax -; movq 16(%rsp), %rbx -; movq 24(%rsp), %r12 -; movq 32(%rsp), %r13 -; movq 40(%rsp), %r14 -; movq 48(%rsp), %r15 -; addq %rsp, $64, %rsp +; movq rsp(0 + virtual offset), %rax +; movq rsp(8 + virtual offset), %rdx +; andl %edx, %eax +; movq 32(%rsp), %rbx +; movq 40(%rsp), %r12 +; movq 48(%rsp), %r13 +; movq 56(%rsp), %r14 +; movq 64(%rsp), %r15 +; addq %rsp, $80, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -318,25 +318,29 @@ block0(v0:i64): ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x40, %rsp -; movq %rbx, 0x10(%rsp) -; movq %r12, 0x18(%rsp) -; movq %r13, 0x20(%rsp) -; movq %r14, 0x28(%rsp) -; movq %r15, 0x30(%rsp) +; subq $0x50, %rsp +; movq %rbx, 0x20(%rsp) +; movq %r12, 0x28(%rsp) +; movq %r13, 0x30(%rsp) +; movq %r14, 0x38(%rsp) +; movq %r15, 0x40(%rsp) ; block1: ; offset 0x21 ; leaq (%rsp), %rdi ; movabsq $0, %r10 ; reloc_external Abs8 %g 0 ; callq *%r10 -; movq 4(%rsp), %rax -; movq (%rsp), %r9 -; andl %r9d, %eax -; movq 0x10(%rsp), %rbx -; movq 0x18(%rsp), %r12 -; movq 0x20(%rsp), %r13 -; movq 0x28(%rsp), %r14 -; movq 0x30(%rsp), %r15 -; addq $0x40, %rsp +; movq 4(%rsp), %r11 +; movq %r11, 0x10(%rsp) +; movq (%rsp), %r11 +; movq %r11, 0x18(%rsp) +; movq 0x10(%rsp), %rax +; movq 0x18(%rsp), %rdx +; andl %edx, %eax +; movq 0x20(%rsp), %rbx +; movq 0x28(%rsp), %r12 +; movq 0x30(%rsp), %r13 +; movq 0x38(%rsp), %r14 +; movq 0x40(%rsp), %r15 +; addq $0x50, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/frontend/Cargo.toml b/cranelift/frontend/Cargo.toml index 00e6829377..561e7ce40e 100644 --- a/cranelift/frontend/Cargo.toml +++ b/cranelift/frontend/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-frontend" -version = "0.119.0" +version = "0.120.0" description = "Cranelift IR builder helper" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-frontend" diff --git a/cranelift/interpreter/Cargo.toml b/cranelift/interpreter/Cargo.toml index 216db8f817..7d17036d36 100644 --- a/cranelift/interpreter/Cargo.toml +++ b/cranelift/interpreter/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-interpreter" -version = "0.119.0" +version = "0.120.0" authors = ["The Cranelift Project Developers"] description = "Interpret Cranelift IR" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/isle/isle/Cargo.toml b/cranelift/isle/isle/Cargo.toml index bed834d65c..94642ebfd3 100644 --- a/cranelift/isle/isle/Cargo.toml +++ b/cranelift/isle/isle/Cargo.toml @@ -7,7 +7,7 @@ license = "Apache-2.0 WITH LLVM-exception" name = "cranelift-isle" readme = "../README.md" repository = "https://github.com/bytecodealliance/wasmtime/tree/main/cranelift/isle" -version = "0.119.0" +version = "0.120.0" [lints] workspace = true diff --git a/cranelift/jit/Cargo.toml b/cranelift/jit/Cargo.toml index 4538c5122d..b4260eed3b 100644 --- a/cranelift/jit/Cargo.toml +++ b/cranelift/jit/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-jit" -version = "0.119.0" +version = "0.120.0" authors = ["The Cranelift Project Developers"] description = "A JIT library backed by Cranelift" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/module/Cargo.toml b/cranelift/module/Cargo.toml index 5dbaf62251..e95c9efb41 100644 --- a/cranelift/module/Cargo.toml +++ b/cranelift/module/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-module" -version = "0.119.0" +version = "0.120.0" authors = ["The Cranelift Project Developers"] description = "Support for linking functions and data with Cranelift" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/native/Cargo.toml b/cranelift/native/Cargo.toml index 40c57329f5..b264592d3f 100644 --- a/cranelift/native/Cargo.toml +++ b/cranelift/native/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-native" -version = "0.119.0" +version = "0.120.0" authors = ["The Cranelift Project Developers"] description = "Support for targeting the host with Cranelift" documentation = "https://docs.rs/cranelift-native" diff --git a/cranelift/object/Cargo.toml b/cranelift/object/Cargo.toml index 805b9aba22..0e396cfb52 100644 --- a/cranelift/object/Cargo.toml +++ b/cranelift/object/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-object" -version = "0.119.0" +version = "0.120.0" authors = ["The Cranelift Project Developers"] description = "Emit Cranelift output to native object files with `object`" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/reader/Cargo.toml b/cranelift/reader/Cargo.toml index 0a1db25894..76766d4782 100644 --- a/cranelift/reader/Cargo.toml +++ b/cranelift/reader/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-reader" -version = "0.119.0" +version = "0.120.0" description = "Cranelift textual IR reader" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-reader" diff --git a/cranelift/serde/Cargo.toml b/cranelift/serde/Cargo.toml index 54e469ee4a..e2d7132c50 100644 --- a/cranelift/serde/Cargo.toml +++ b/cranelift/serde/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-serde" -version = "0.119.0" +version = "0.120.0" authors = ["The Cranelift Project Developers"] description = "Serializer/Deserializer for Cranelift IR" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/srcgen/Cargo.toml b/cranelift/srcgen/Cargo.toml index f82089e732..74b747de35 100644 --- a/cranelift/srcgen/Cargo.toml +++ b/cranelift/srcgen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-srcgen" -version = "0.119.0" +version = "0.120.0" authors = ["The Wasmtime Project Developers"] description = "Helper functions for generating Rust and ISLE files" license = "Apache-2.0 WITH LLVM-exception" diff --git a/cranelift/umbrella/Cargo.toml b/cranelift/umbrella/Cargo.toml index fee70be4a4..53f41c6016 100644 --- a/cranelift/umbrella/Cargo.toml +++ b/cranelift/umbrella/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift" -version = "0.119.0" +version = "0.120.0" description = "Umbrella for commonly-used cranelift crates" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift" diff --git a/crates/c-api/include/wasmtime.h b/crates/c-api/include/wasmtime.h index 2a7dccb370..79af55ef97 100644 --- a/crates/c-api/include/wasmtime.h +++ b/crates/c-api/include/wasmtime.h @@ -206,11 +206,11 @@ /** * \brief Wasmtime version string. */ -#define WASMTIME_VERSION "32.0.0" +#define WASMTIME_VERSION "33.0.0" /** * \brief Wasmtime major version number. */ -#define WASMTIME_VERSION_MAJOR 32 +#define WASMTIME_VERSION_MAJOR 33 /** * \brief Wasmtime minor version number. */ diff --git a/crates/cranelift/src/translate/code_translator/bounds_checks.rs b/crates/cranelift/src/bounds_checks.rs similarity index 97% rename from crates/cranelift/src/translate/code_translator/bounds_checks.rs rename to crates/cranelift/src/bounds_checks.rs index 5bb55f687f..ccb943bdb0 100644 --- a/crates/cranelift/src/translate/code_translator/bounds_checks.rs +++ b/crates/cranelift/src/bounds_checks.rs @@ -28,7 +28,7 @@ use cranelift_codegen::{ ir::{Expr, Fact}, }; use cranelift_frontend::FunctionBuilder; -use wasmtime_environ::{Unsigned, WasmResult}; +use wasmtime_environ::Unsigned; use Reachability::*; /// Helper used to emit bounds checks (as necessary) and compute the native @@ -46,7 +46,7 @@ pub fn bounds_check_and_compute_addr( offset: u32, // Static size of the heap access. access_size: u8, -) -> WasmResult> { +) -> Reachability { let pointer_bit_width = u16::try_from(env.pointer_type().bits()).unwrap(); let bound_gv = heap.bound; let orig_index = index; @@ -165,9 +165,9 @@ pub fn bounds_check_and_compute_addr( // Special case: trap immediately if `offset + access_size > // max_memory_size`, since we will end up being out-of-bounds regardless // of the given `index`. - env.before_unconditionally_trapping_memory_access(builder)?; + env.before_unconditionally_trapping_memory_access(builder); env.trap(builder, ir::TrapCode::HEAP_OUT_OF_BOUNDS); - return Ok(Unreachable); + return Unreachable; } // Special case: if this is a 32-bit platform and the `offset_and_size` @@ -175,9 +175,9 @@ pub fn bounds_check_and_compute_addr( // being in-bounds. We can't represent `offset_and_size` in CLIF as the // native pointer type anyway, so this is an unconditional trap. if pointer_bit_width < 64 && offset_and_size >= (1 << pointer_bit_width) { - env.before_unconditionally_trapping_memory_access(builder)?; + env.before_unconditionally_trapping_memory_access(builder); env.trap(builder, ir::TrapCode::HEAP_OUT_OF_BOUNDS); - return Ok(Unreachable); + return Unreachable; } // Special case for when we can completely omit explicit @@ -226,27 +226,27 @@ pub fn bounds_check_and_compute_addr( can_use_virtual_memory, "static memories require the ability to use virtual memory" ); - return Ok(Reachable(compute_addr( + return Reachable(compute_addr( &mut builder.cursor(), heap, env.pointer_type(), index, offset, AddrPcc::static32(heap.pcc_memory_type, memory_reservation + memory_guard_size), - ))); + )); } // Special case when the `index` is a constant and statically known to be // in-bounds on this memory, no bounds checks necessary. if statically_in_bounds { - return Ok(Reachable(compute_addr( + return Reachable(compute_addr( &mut builder.cursor(), heap, env.pointer_type(), index, offset, AddrPcc::static32(heap.pcc_memory_type, memory_reservation + memory_guard_size), - ))); + )); } // Special case for when we can rely on virtual memory, the minimum @@ -287,7 +287,7 @@ pub fn bounds_check_and_compute_addr( adjusted_bound_value, Some(0), ); - return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + return Reachable(explicit_check_oob_condition_and_compute_addr( env, builder, heap, @@ -297,7 +297,7 @@ pub fn bounds_check_and_compute_addr( oob_behavior, AddrPcc::static32(heap.pcc_memory_type, memory_reservation), oob, - ))); + )); } // Special case for when `offset + access_size == 1`: @@ -320,7 +320,7 @@ pub fn bounds_check_and_compute_addr( bound, Some(0), ); - return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + return Reachable(explicit_check_oob_condition_and_compute_addr( env, builder, heap, @@ -330,7 +330,7 @@ pub fn bounds_check_and_compute_addr( oob_behavior, AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), oob, - ))); + )); } // Special case for when we know that there are enough guard @@ -368,7 +368,7 @@ pub fn bounds_check_and_compute_addr( bound, Some(0), ); - return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + return Reachable(explicit_check_oob_condition_and_compute_addr( env, builder, heap, @@ -378,7 +378,7 @@ pub fn bounds_check_and_compute_addr( oob_behavior, AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), oob, - ))); + )); } // Special case for when `offset + access_size <= min_size`. @@ -412,7 +412,7 @@ pub fn bounds_check_and_compute_addr( adjusted_bound, Some(adjustment), ); - return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + return Reachable(explicit_check_oob_condition_and_compute_addr( env, builder, heap, @@ -422,7 +422,7 @@ pub fn bounds_check_and_compute_addr( oob_behavior, AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), oob, - ))); + )); } // General case for dynamic bounds checks: @@ -461,7 +461,7 @@ pub fn bounds_check_and_compute_addr( bound, Some(0), ); - Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + Reachable(explicit_check_oob_condition_and_compute_addr( env, builder, heap, @@ -471,7 +471,7 @@ pub fn bounds_check_and_compute_addr( oob_behavior, AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), oob, - ))) + )) } /// Get the bound of a dynamic heap as an `ir::Value`. diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 0ee29a26a7..b339b7bf08 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -14,7 +14,6 @@ use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{self, types}; use cranelift_codegen::ir::{ArgumentPurpose, Function, InstBuilder, MemFlags}; use cranelift_codegen::isa::{TargetFrontendConfig, TargetIsa}; -use cranelift_entity::packed_option::ReservedValue as _; use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; use cranelift_frontend::FunctionBuilder; use cranelift_frontend::Variable; @@ -118,6 +117,9 @@ pub struct FuncEnvironment<'module_environment> { /// The Cranelift global holding the vmctx address. vmctx: Option, + /// The Cranelift global for our vmctx's `*mut VMStoreContext`. + vm_store_context: Option, + /// The PCC memory type describing the vmctx layout, if we're /// using PCC. pcc_vmctx_memtype: Option, @@ -136,12 +138,6 @@ pub struct FuncEnvironment<'module_environment> { /// in `*const VMStoreContext` fuel_var: cranelift_frontend::Variable, - /// A function-local variable which caches the value of `*const - /// VMStoreContext` for this function's vmctx argument. This pointer is stored - /// in the vmctx itself, but never changes for the lifetime of the function, - /// so if we load it up front we can continue to use it throughout. - vmstore_context_ptr: ir::Value, - /// A cached epoch deadline value, when performing epoch-based /// interruption. Loaded from `VMStoreContext` and reloaded after /// any yield. @@ -196,6 +192,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { heaps: PrimaryMap::default(), tables: SecondaryMap::default(), vmctx: None, + vm_store_context: None, pcc_vmctx_memtype: None, builtin_functions, offsets: VMOffsets::new(compiler.isa().pointer_bytes(), &translation.module), @@ -203,7 +200,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { fuel_var: Variable::new(0), epoch_deadline_var: Variable::new(0), epoch_ptr_var: Variable::new(0), - vmstore_context_ptr: ir::Value::reserved_value(), // Start with at least one fuel being consumed because even empty // functions should consume at least some fuel. @@ -308,22 +304,29 @@ impl<'module_environment> FuncEnvironment<'module_environment> { } } - fn declare_vmstore_context_ptr(&mut self, builder: &mut FunctionBuilder<'_>) { - // We load the `*const VMStoreContext` value stored within vmctx at the - // head of the function and reuse the same value across the entire - // function. This is possible since we know that the pointer never - // changes for the lifetime of the function. - let pointer_type = self.pointer_type(); - let vmctx = self.vmctx(builder.func); - let base = builder.ins().global_value(pointer_type, vmctx); - let offset = i32::from(self.offsets.ptr.vmctx_store_context()); - debug_assert!(self.vmstore_context_ptr.is_reserved_value()); - self.vmstore_context_ptr = builder.ins().load( - pointer_type, - ir::MemFlags::trusted().with_readonly().with_can_move(), + /// Get or create the `ir::Global` for the `*mut VMStoreContext` in our + /// `VMContext`. + fn get_vmstore_context_ptr_global(&mut self, func: &mut ir::Function) -> ir::GlobalValue { + if let Some(ptr) = self.vm_store_context { + return ptr; + } + + let offset = self.offsets.ptr.vmctx_store_context(); + let base = self.vmctx(func); + let ptr = func.create_global_value(ir::GlobalValueData::Load { base, - offset, - ); + offset: Offset32::new(offset.into()), + global_type: self.pointer_type(), + flags: ir::MemFlags::trusted().with_readonly().with_can_move(), + }); + self.vm_store_context = Some(ptr); + ptr + } + + /// Get the `*mut VMStoreContext` value for our `VMContext`. + fn get_vmstore_context_ptr(&mut self, builder: &mut FunctionBuilder) -> ir::Value { + let global = self.get_vmstore_context_ptr_global(&mut builder.func); + builder.ins().global_value(self.pointer_type(), global) } fn fuel_function_entry(&mut self, builder: &mut FunctionBuilder<'_>) { @@ -471,7 +474,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { /// Loads the fuel consumption value from `VMStoreContext` into `self.fuel_var` fn fuel_load_into_var(&mut self, builder: &mut FunctionBuilder<'_>) { - let (addr, offset) = self.fuel_addr_offset(); + let (addr, offset) = self.fuel_addr_offset(builder); let fuel = builder .ins() .load(ir::types::I64, ir::MemFlags::trusted(), addr, offset); @@ -481,7 +484,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { /// Stores the fuel consumption value from `self.fuel_var` into /// `VMStoreContext`. fn fuel_save_from_var(&mut self, builder: &mut FunctionBuilder<'_>) { - let (addr, offset) = self.fuel_addr_offset(); + let (addr, offset) = self.fuel_addr_offset(builder); let fuel_consumed = builder.use_var(self.fuel_var); builder .ins() @@ -490,10 +493,13 @@ impl<'module_environment> FuncEnvironment<'module_environment> { /// Returns the `(address, offset)` of the fuel consumption within /// `VMStoreContext`, used to perform loads/stores later. - fn fuel_addr_offset(&mut self) -> (ir::Value, ir::immediates::Offset32) { - debug_assert!(!self.vmstore_context_ptr.is_reserved_value()); + fn fuel_addr_offset( + &mut self, + builder: &mut FunctionBuilder<'_>, + ) -> (ir::Value, ir::immediates::Offset32) { + let vmstore_ctx = self.get_vmstore_context_ptr(builder); ( - self.vmstore_context_ptr, + vmstore_ctx, i32::from(self.offsets.ptr.vmstore_context_fuel_consumed()).into(), ) } @@ -678,10 +684,11 @@ impl<'module_environment> FuncEnvironment<'module_environment> { // We keep the deadline cached in a register to speed the checks // in the common case (between epoch ticks) but we want to do a // precise check here by reloading the cache first. + let vmstore_ctx = self.get_vmstore_context_ptr(builder); let deadline = builder.ins().load( ir::types::I64, ir::MemFlags::trusted(), - self.vmstore_context_ptr, + vmstore_ctx, ir::immediates::Offset32::new(self.offsets.ptr.vmstore_context_epoch_deadline() as i32), ); builder.def_var(self.epoch_deadline_var, deadline); @@ -1009,37 +1016,49 @@ impl<'module_environment> FuncEnvironment<'module_environment> { } } - /// Add one level of indirection to a pointer-and-memtype pair: - /// generate a load in the code at the specified offset, and if - /// memtypes are in use, add a field to the original struct and - /// generate a new memtype for the pointee. - fn load_pointer_with_memtypes( + /// Create an `ir::Global` that does `load(ptr + offset)` and, when PCC and + /// memory types are enabled, adds a field to the pointer's memory type for + /// this value we are loading. + pub(crate) fn global_load_with_memory_type( &mut self, func: &mut ir::Function, + ptr: ir::GlobalValue, offset: u32, - readonly: bool, - memtype: Option, + flags: ir::MemFlags, + ptr_mem_ty: Option, ) -> (ir::GlobalValue, Option) { - let vmctx = self.vmctx(func); let pointee = func.create_global_value(ir::GlobalValueData::Load { - base: vmctx, + base: ptr, offset: Offset32::new(i32::try_from(offset).unwrap()), global_type: self.pointer_type(), - flags: MemFlags::trusted().with_readonly().with_can_move(), + flags, }); - let mt = memtype.map(|mt| { - let pointee_mt = self.create_empty_struct_memtype(func); - self.add_field_to_memtype(func, mt, offset, pointee_mt, readonly); + let pointee_mem_ty = ptr_mem_ty.map(|ptr_mem_ty| { + let pointee_mem_ty = self.create_empty_struct_memtype(func); + self.add_field_to_memtype(func, ptr_mem_ty, offset, pointee_mem_ty, flags.readonly()); func.global_value_facts[pointee] = Some(Fact::Mem { - ty: pointee_mt, + ty: pointee_mem_ty, min_offset: 0, max_offset: 0, nullable: false, }); - pointee_mt + pointee_mem_ty }); - (pointee, mt) + + (pointee, pointee_mem_ty) + } + + /// Like `global_load_with_memory_type` but specialized for loads out of the + /// `vmctx`. + pub(crate) fn global_load_from_vmctx_with_memory_type( + &mut self, + func: &mut ir::Function, + offset: u32, + flags: ir::MemFlags, + ) -> (ir::GlobalValue, Option) { + let vmctx = self.vmctx(func); + self.global_load_with_memory_type(func, vmctx, offset, flags, self.pcc_vmctx_memtype) } /// Helper to emit a conditional trap based on `trap_cond`. @@ -2323,7 +2342,7 @@ impl FuncEnvironment<'_> { let memory = self.module.memories[index]; let is_shared = memory.shared; - let (ptr, base_offset, current_length_offset, ptr_memtype) = { + let (base_ptr, base_offset, current_length_offset, ptr_memtype) = { let vmctx = self.vmctx(func); if let Some(def_index) = self.module.defined_memory_index(index) { if is_shared { @@ -2332,11 +2351,10 @@ impl FuncEnvironment<'_> { // VMMemoryDefinition` to it and dereference that when // atomically growing it. let from_offset = self.offsets.vmctx_vmmemory_pointer(def_index); - let (memory, def_mt) = self.load_pointer_with_memtypes( + let (memory, def_mt) = self.global_load_from_vmctx_with_memory_type( func, from_offset, - true, - self.pcc_vmctx_memtype, + ir::MemFlags::trusted().with_readonly().with_can_move(), ); let base_offset = i32::from(self.offsets.ptr.vmmemory_definition_base()); let current_length_offset = @@ -2360,11 +2378,10 @@ impl FuncEnvironment<'_> { } } else { let from_offset = self.offsets.vmctx_vmmemory_import_from(index); - let (memory, def_mt) = self.load_pointer_with_memtypes( + let (memory, def_mt) = self.global_load_from_vmctx_with_memory_type( func, from_offset, - true, - self.pcc_vmctx_memtype, + ir::MemFlags::trusted().with_readonly().with_can_move(), ); let base_offset = i32::from(self.offsets.ptr.vmmemory_definition_base()); let current_length_offset = @@ -2373,13 +2390,66 @@ impl FuncEnvironment<'_> { } }; - let heap_bound = func.create_global_value(ir::GlobalValueData::Load { - base: ptr, + let bound = func.create_global_value(ir::GlobalValueData::Load { + base: base_ptr, offset: Offset32::new(current_length_offset), global_type: pointer_type, flags: MemFlags::trusted(), }); + let (base_fact, pcc_memory_type) = self.make_pcc_base_fact_and_type_for_memory( + func, + memory, + base_offset, + current_length_offset, + ptr_memtype, + bound, + ); + + let base = self.make_heap_base(func, memory, base_ptr, base_offset, base_fact); + + Ok(self.heaps.push(HeapData { + base, + bound, + pcc_memory_type, + memory, + })) + } + + pub(crate) fn make_heap_base( + &self, + func: &mut Function, + memory: Memory, + ptr: ir::GlobalValue, + offset: i32, + fact: Option, + ) -> ir::GlobalValue { + let pointer_type = self.pointer_type(); + + let mut flags = ir::MemFlags::trusted().with_checked().with_can_move(); + if !memory.memory_may_move(self.tunables) { + flags.set_readonly(); + } + + let heap_base = func.create_global_value(ir::GlobalValueData::Load { + base: ptr, + offset: Offset32::new(offset), + global_type: pointer_type, + flags, + }); + func.global_value_facts[heap_base] = fact; + heap_base + } + + pub(crate) fn make_pcc_base_fact_and_type_for_memory( + &mut self, + func: &mut Function, + memory: Memory, + base_offset: i32, + current_length_offset: i32, + ptr_memtype: Option, + heap_bound: ir::GlobalValue, + ) -> (Option, Option) { // If we have a declared maximum, we can make this a "static" heap, which is // allocated up front and never moved. let host_page_size_log2 = self.target_config().page_size_align_log2; @@ -2486,25 +2556,7 @@ impl FuncEnvironment<'_> { (None, None) } }; - - let mut flags = MemFlags::trusted().with_checked().with_can_move(); - if !memory.memory_may_move(self.tunables) { - flags.set_readonly(); - } - let heap_base = func.create_global_value(ir::GlobalValueData::Load { - base: ptr, - offset: Offset32::new(base_offset), - global_type: pointer_type, - flags, - }); - func.global_value_facts[heap_base] = base_fact; - - Ok(self.heaps.push(HeapData { - base: heap_base, - bound: heap_bound, - pcc_memory_type: memory_type, - memory, - })) + (base_fact, memory_type) } pub fn make_global( @@ -3073,15 +3125,11 @@ impl FuncEnvironment<'_> { Ok(()) } - pub fn before_unconditionally_trapping_memory_access( - &mut self, - builder: &mut FunctionBuilder, - ) -> WasmResult<()> { + pub fn before_unconditionally_trapping_memory_access(&mut self, builder: &mut FunctionBuilder) { if self.tunables.consume_fuel { self.fuel_increment_var(builder); self.fuel_save_from_var(builder); } - Ok(()) } pub fn before_translate_function( @@ -3098,15 +3146,11 @@ impl FuncEnvironment<'_> { self.conditionally_trap(builder, overflow, ir::TrapCode::STACK_OVERFLOW); } - // If the `vmstore_context_ptr` variable will get used then we - // initialize it here. - if self.tunables.consume_fuel || self.tunables.epoch_interruption { - self.declare_vmstore_context_ptr(builder); - } // Additionally we initialize `fuel_var` if it will get used. if self.tunables.consume_fuel { self.fuel_function_entry(builder); } + // Initialize `epoch_var` with the current epoch. if self.tunables.epoch_interruption { self.epoch_function_entry(builder); diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index 076bf6a202..5cb5bdfa68 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -30,6 +30,7 @@ pub use obj::*; mod compiled_function; pub use compiled_function::*; +mod bounds_checks; mod builder; mod compiler; mod debug; @@ -431,3 +432,19 @@ impl BuiltinFunctionSignatures { /// Must be kept in sync with /// `crate::runtime::vm::gc::VMGcRef::I31_REF_DISCRIMINANT`. const I31_REF_DISCRIMINANT: u32 = 1; + +/// Like `Option` but specifically for passing information about transitions +/// from reachable to unreachable state and the like from callees to callers. +/// +/// Marked `must_use` to force callers to update +/// `FuncTranslationState::reachable` as necessary. +#[derive(PartialEq, Eq)] +#[must_use] +enum Reachability { + /// The Wasm execution state is reachable, here is a `T`. + Reachable(T), + /// The Wasm execution state has been determined to be statically + /// unreachable. It is the receiver of this value's responsibility to update + /// `FuncTranslationState::reachable` as necessary. + Unreachable, +} diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index 8bb55974ec..a6e6c3310b 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -71,14 +71,14 @@ //! //! ("Relax verification to allow I8X16 to act as a default vector type") -mod bounds_checks; - +use crate::bounds_checks::bounds_check_and_compute_addr; use crate::func_environ::{Extension, FuncEnvironment}; use crate::translate::environ::{GlobalVariable, StructFieldsVec}; use crate::translate::state::{ControlStackFrame, ElseData, FuncTranslationState}; use crate::translate::translation_utils::{ block_with_params, blocktype_params_results, f32_translation, f64_translation, }; +use crate::Reachability; use cranelift_codegen::ir::condcodes::{FloatCC, IntCC}; use cranelift_codegen::ir::immediates::Offset32; use cranelift_codegen::ir::types::*; @@ -3202,14 +3202,9 @@ fn prepare_addr( let addr = match u32::try_from(memarg.offset) { // If our offset fits within a u32, then we can place the it into the // offset immediate of the `heap_addr` instruction. - Ok(offset) => bounds_checks::bounds_check_and_compute_addr( - builder, - environ, - &heap, - index, - offset, - access_size, - )?, + Ok(offset) => { + bounds_check_and_compute_addr(builder, environ, &heap, index, offset, access_size) + } // If the offset doesn't fit within a u32, then we can't pass it // directly into `heap_addr`. @@ -3247,14 +3242,7 @@ fn prepare_addr( offset, ir::TrapCode::HEAP_OUT_OF_BOUNDS, ); - bounds_checks::bounds_check_and_compute_addr( - builder, - environ, - &heap, - adjusted_index, - 0, - access_size, - )? + bounds_check_and_compute_addr(builder, environ, &heap, adjusted_index, 0, access_size) } }; let addr = match addr { @@ -3331,22 +3319,6 @@ fn prepare_atomic_addr( prepare_addr(memarg, loaded_bytes, builder, state, environ) } -/// Like `Option` but specifically for passing information about transitions -/// from reachable to unreachable state and the like from callees to callers. -/// -/// Marked `must_use` to force callers to update -/// `FuncTranslationState::reachable` as necessary. -#[derive(PartialEq, Eq)] -#[must_use] -pub enum Reachability { - /// The Wasm execution state is reachable, here is a `T`. - Reachable(T), - /// The Wasm execution state has been determined to be statically - /// unreachable. It is the receiver of this value's responsibility to update - /// `FuncTranslationState::reachable` as necessary. - Unreachable, -} - /// Translate a load instruction. /// /// Returns the execution state's reachability after the load is translated. diff --git a/crates/math/src/lib.rs b/crates/math/src/lib.rs index 008cedfa7d..cb1ffa4340 100644 --- a/crates/math/src/lib.rs +++ b/crates/math/src/lib.rs @@ -37,13 +37,13 @@ pub trait WasmFloat { impl WasmFloat for f32 { #[inline] fn wasm_trunc(self) -> f32 { + if self.is_nan() { + return f32::NAN; + } #[cfg(feature = "std")] if !cfg!(windows) && !cfg!(target_arch = "riscv64") { return self.trunc(); } - if self.is_nan() { - return f32::NAN; - } libm::truncf(self) } #[inline] @@ -56,24 +56,24 @@ impl WasmFloat for f32 { } #[inline] fn wasm_floor(self) -> f32 { + if self.is_nan() { + return f32::NAN; + } #[cfg(feature = "std")] if !cfg!(target_arch = "riscv64") { return self.floor(); } - if self.is_nan() { - return f32::NAN; - } libm::floorf(self) } #[inline] fn wasm_ceil(self) -> f32 { + if self.is_nan() { + return f32::NAN; + } #[cfg(feature = "std")] if !cfg!(target_arch = "riscv64") { return self.ceil(); } - if self.is_nan() { - return f32::NAN; - } libm::ceilf(self) } #[inline] @@ -94,13 +94,13 @@ impl WasmFloat for f32 { } #[inline] fn wasm_nearest(self) -> f32 { + if self.is_nan() { + return f32::NAN; + } #[cfg(feature = "std")] if !cfg!(windows) && !cfg!(target_arch = "riscv64") { return self.round_ties_even(); } - if self.is_nan() { - return f32::NAN; - } let round = libm::roundf(self); if libm::fabsf(self - round) != 0.5 { return round; @@ -162,13 +162,13 @@ impl WasmFloat for f32 { impl WasmFloat for f64 { #[inline] fn wasm_trunc(self) -> f64 { + if self.is_nan() { + return f64::NAN; + } #[cfg(feature = "std")] if !cfg!(windows) && !cfg!(target_arch = "riscv64") { return self.trunc(); } - if self.is_nan() { - return f64::NAN; - } libm::trunc(self) } #[inline] @@ -181,24 +181,24 @@ impl WasmFloat for f64 { } #[inline] fn wasm_floor(self) -> f64 { + if self.is_nan() { + return f64::NAN; + } #[cfg(feature = "std")] if !cfg!(target_arch = "riscv64") { return self.floor(); } - if self.is_nan() { - return f64::NAN; - } libm::floor(self) } #[inline] fn wasm_ceil(self) -> f64 { + if self.is_nan() { + return f64::NAN; + } #[cfg(feature = "std")] if !cfg!(target_arch = "riscv64") { return self.ceil(); } - if self.is_nan() { - return f64::NAN; - } libm::ceil(self) } #[inline] @@ -219,13 +219,13 @@ impl WasmFloat for f64 { } #[inline] fn wasm_nearest(self) -> f64 { + if self.is_nan() { + return f64::NAN; + } #[cfg(feature = "std")] if !cfg!(windows) && !cfg!(target_arch = "riscv64") { return self.round_ties_even(); } - if self.is_nan() { - return f64::NAN; - } let round = libm::round(self); if libm::fabs(self - round) != 0.5 { return round; diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs index 8794ec360d..60ed07d62c 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs @@ -1,11 +1,26 @@ use crate::prelude::*; use alloc::collections::BTreeMap; -use core::cmp; use core::{alloc::Layout, num::NonZeroU32, ops::Bound}; /// A very simple first-fit free list for use by our garbage collectors. pub(crate) struct FreeList { /// The total capacity of the contiguous range of memory we are managing. + /// + /// NB: we keep `self.capacity` unrounded because otherwise we would get + /// rounding errors where we lose track of the actual capacity we have when + /// repeatedly adding capacity `n` where `n < ALIGN`: + /// + /// ```ignore + /// let mut free_list = FreeList::new(0); + /// loop { + /// free_list.add_capacity(1); + /// } + /// ``` + /// + /// If we eagerly rounded capacity down to our alignment on every call to + /// `add_capacity`, the free list would always think it has zero capacity, + /// even though it would have enough capacity for many allocations after + /// enough iterations of the loop. capacity: usize, /// Our free blocks, as a map from index to length of the free block at that /// index. @@ -28,7 +43,7 @@ impl FreeList { /// Create a new `FreeList` for a contiguous region of memory of the given /// size. pub fn new(capacity: usize) -> Self { - log::trace!("FreeList::new({capacity})"); + log::debug!("FreeList::new({capacity})"); let mut free_list = FreeList { capacity, free_block_index_to_len: BTreeMap::new(), @@ -37,8 +52,66 @@ impl FreeList { free_list } + /// Add additional capacity to this free list. + #[allow(dead_code)] // TODO: becomes used in https://github.com/bytecodealliance/wasmtime/pull/10503 + pub fn add_capacity(&mut self, additional: usize) { + let old_cap = self.capacity; + self.capacity = self.capacity.saturating_add(additional); + log::debug!( + "FreeList::add_capacity({additional:#x}): capacity growing from {old_cap:#x} to {:#x}", + self.capacity + ); + + // See the comment on `self.capacity` about why we need to do the + // alignment-rounding here, rather than keeping `self.capacity` aligned + // at rest. + let old_cap_rounded = round_usize_down_to_pow2(old_cap, ALIGN_USIZE); + + // If we are adding capacity beyond what a `u32` can address, then we + // can't actually use that capacity, so don't bother adding a new block + // to the free list. + let Ok(old_cap_rounded) = u32::try_from(old_cap_rounded) else { + return; + }; + + // Our new block's index is the end of the old capacity. + let index = NonZeroU32::new(old_cap_rounded).unwrap_or( + // But additionally all indices must be non-zero, so start the new + // block at the first aligned index if necessary. + NonZeroU32::new(ALIGN_U32).unwrap(), + ); + + // If, after rounding everything to our alignment, we aren't actually + // gaining any new capacity, then don't add a new block to the free + // list. + let new_cap = u32::try_from(self.capacity).unwrap_or(u32::MAX); + let new_cap = round_u32_down_to_pow2(new_cap, ALIGN_U32); + debug_assert!(new_cap >= index.get()); + let size = new_cap - index.get(); + debug_assert_eq!(size % ALIGN_U32, 0); + if size == 0 { + return; + } + + // If we can't represent this block in a `Layout`, then don't add it to + // our free list either. + let Ok(layout) = Layout::from_size_align(usize::try_from(size).unwrap(), ALIGN_USIZE) + else { + return; + }; + + // Okay! Add a block to our free list for the new capacity, potentially + // merging it with existing blocks at the end of the free list. + log::trace!( + "FreeList::add_capacity(..): adding block {index:#x}..{:#x}", + index.get() + size + ); + self.dealloc(index, layout); + } + + #[cfg(test)] fn max_size(&self) -> usize { - let cap = cmp::min(self.capacity, usize::try_from(u32::MAX).unwrap()); + let cap = core::cmp::min(self.capacity, usize::try_from(u32::MAX).unwrap()); round_usize_down_to_pow2(cap.saturating_sub(ALIGN_USIZE), ALIGN_USIZE) } @@ -47,21 +120,11 @@ impl FreeList { fn check_layout(&self, layout: Layout) -> Result { ensure!( layout.align() <= ALIGN_USIZE, - "requested allocation's alignment of {} is greater than max supported alignment of {ALIGN_USIZE}", + "requested allocation's alignment of {} is greater than max supported \ + alignment of {ALIGN_USIZE}", layout.align(), ); - if layout.size() > self.max_size() { - let trap = crate::Trap::AllocationTooLarge; - let err = anyhow::Error::from(trap); - let err = err.context(format!( - "requested allocation's size of {} is greater than the max supported size of {}", - layout.size(), - self.max_size(), - )); - return Err(err); - } - let alloc_size = u32::try_from(layout.size()).map_err(|e| { let trap = crate::Trap::AllocationTooLarge; let err = anyhow::Error::from(trap); @@ -377,6 +440,8 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn check_no_fragmentation((capacity, ops) in ops()) { + let _ = env_logger::try_init(); + // Map from allocation id to ptr. let mut live = HashMap::new(); @@ -519,23 +584,14 @@ mod tests { fn allocate_no_split() { // Create a free list with the capacity to allocate two blocks of size // `ALIGN_U32`. - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 2); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 2); assert_eq!(free_list.free_block_index_to_len.len(), 1); - assert_eq!( - free_list.max_size(), - usize::try_from(ALIGN_U32).unwrap() * 2 - ); + assert_eq!(free_list.max_size(), ALIGN_USIZE * 2); // Allocate a block such that the remainder is not worth splitting. free_list - .alloc( - Layout::from_size_align( - usize::try_from(ALIGN_U32).unwrap() + ALIGN_USIZE, - ALIGN_USIZE, - ) - .unwrap(), - ) + .alloc(Layout::from_size_align(ALIGN_USIZE + ALIGN_USIZE, ALIGN_USIZE).unwrap()) .expect("allocation within 'static' free list limits") .expect("have free space available for allocation"); @@ -547,23 +603,14 @@ mod tests { fn allocate_and_split() { // Create a free list with the capacity to allocate three blocks of size // `ALIGN_U32`. - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 3); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 3); assert_eq!(free_list.free_block_index_to_len.len(), 1); - assert_eq!( - free_list.max_size(), - usize::try_from(ALIGN_U32).unwrap() * 3 - ); + assert_eq!(free_list.max_size(), ALIGN_USIZE * 3); // Allocate a block such that the remainder is not worth splitting. free_list - .alloc( - Layout::from_size_align( - usize::try_from(ALIGN_U32).unwrap() + ALIGN_USIZE, - ALIGN_USIZE, - ) - .unwrap(), - ) + .alloc(Layout::from_size_align(ALIGN_USIZE + ALIGN_USIZE, ALIGN_USIZE).unwrap()) .expect("allocation within 'static' free list limits") .expect("have free space available for allocation"); @@ -573,10 +620,9 @@ mod tests { #[test] fn dealloc_merge_prev_and_next() { - let layout = - Layout::from_size_align(usize::try_from(ALIGN_U32).unwrap(), ALIGN_USIZE).unwrap(); + let layout = Layout::from_size_align(ALIGN_USIZE, ALIGN_USIZE).unwrap(); - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 100); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( free_list.free_block_index_to_len.len(), 1, @@ -621,10 +667,9 @@ mod tests { #[test] fn dealloc_merge_with_prev_and_not_next() { - let layout = - Layout::from_size_align(usize::try_from(ALIGN_U32).unwrap(), ALIGN_USIZE).unwrap(); + let layout = Layout::from_size_align(ALIGN_USIZE, ALIGN_USIZE).unwrap(); - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 100); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( free_list.free_block_index_to_len.len(), 1, @@ -669,10 +714,9 @@ mod tests { #[test] fn dealloc_merge_with_next_and_not_prev() { - let layout = - Layout::from_size_align(usize::try_from(ALIGN_U32).unwrap(), ALIGN_USIZE).unwrap(); + let layout = Layout::from_size_align(ALIGN_USIZE, ALIGN_USIZE).unwrap(); - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 100); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( free_list.free_block_index_to_len.len(), 1, @@ -717,10 +761,9 @@ mod tests { #[test] fn dealloc_no_merge() { - let layout = - Layout::from_size_align(usize::try_from(ALIGN_U32).unwrap(), ALIGN_USIZE).unwrap(); + let layout = Layout::from_size_align(ALIGN_USIZE, ALIGN_USIZE).unwrap(); - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 100); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( free_list.free_block_index_to_len.len(), 1, @@ -770,38 +813,27 @@ mod tests { #[test] fn alloc_size_too_large() { // Free list with room for 10 min-sized blocks. - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 10); - assert_eq!( - free_list.max_size(), - usize::try_from(ALIGN_U32).unwrap() * 10 - ); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 10); + assert_eq!(free_list.max_size(), ALIGN_USIZE * 10); // Attempt to allocate something that is 20 times the size of our // min-sized block. assert!(free_list - .alloc( - Layout::from_size_align(usize::try_from(ALIGN_U32).unwrap() * 20, ALIGN_USIZE) - .unwrap(), - ) - .is_err()); + .alloc(Layout::from_size_align(ALIGN_USIZE * 20, ALIGN_USIZE).unwrap()) + .unwrap() + .is_none()); } #[test] fn alloc_align_too_large() { // Free list with room for 10 min-sized blocks. - let mut free_list = FreeList::new(ALIGN_USIZE + usize::try_from(ALIGN_U32).unwrap() * 10); - assert_eq!( - free_list.max_size(), - usize::try_from(ALIGN_U32).unwrap() * 10 - ); + let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 10); + assert_eq!(free_list.max_size(), ALIGN_USIZE * 10); // Attempt to allocate something that requires larger alignment than // `FreeList` supports. assert!(free_list - .alloc( - Layout::from_size_align(usize::try_from(ALIGN_U32).unwrap(), ALIGN_USIZE * 2) - .unwrap(), - ) + .alloc(Layout::from_size_align(ALIGN_USIZE, ALIGN_USIZE * 2).unwrap(),) .is_err()); } @@ -834,4 +866,56 @@ mod tests { test(&mut f, l); } } + + #[test] + fn add_capacity() { + let layout = Layout::from_size_align(ALIGN_USIZE, ALIGN_USIZE).unwrap(); + + let mut free_list = FreeList::new(0); + assert!(free_list.alloc(layout).unwrap().is_none(), "no capacity"); + + free_list.add_capacity(ALIGN_USIZE); + assert!( + free_list.alloc(layout).unwrap().is_none(), + "still not enough capacity because we won't allocate the zero index" + ); + + free_list.add_capacity(1); + assert!( + free_list.alloc(layout).unwrap().is_none(), + "still not enough capacity because allocations are multiples of the alignment" + ); + + free_list.add_capacity(ALIGN_USIZE - 1); + let a = free_list + .alloc(layout) + .unwrap() + .expect("now we have enough capacity for one"); + assert!( + free_list.alloc(layout).unwrap().is_none(), + "but not enough capacity for two" + ); + + free_list.add_capacity(ALIGN_USIZE); + let b = free_list + .alloc(layout) + .unwrap() + .expect("now we have enough capacity for two"); + + free_list.dealloc(a, layout); + free_list.dealloc(b, layout); + assert_eq!( + free_list.free_block_index_to_len.len(), + 1, + "`dealloc` should merge blocks from different `add_capacity` calls together" + ); + + free_list.add_capacity(ALIGN_USIZE); + assert_eq!( + free_list.free_block_index_to_len.len(), + 1, + "`add_capacity` should eagerly merge new capacity into the last block \ + in the free list, when possible" + ); + } } diff --git a/src/commands/objdump.rs b/src/commands/objdump.rs index f936d25657..c905dacaca 100644 --- a/src/commands/objdump.rs +++ b/src/commands/objdump.rs @@ -154,7 +154,7 @@ impl ObjdumpCommand { Func::Wasm } else if name.contains("trampoline") { Func::Trampoline - } else if name.contains("libcall") { + } else if name.contains("libcall") || name.starts_with("component") { Func::Libcall } else { panic!("unknown symbol: {name}") diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index 9e4c25438f..4826137617 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -6,400 +6,400 @@ version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-assembler-x64]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-assembler-x64]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-assembler-x64-meta]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-assembler-x64-meta]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-bforest]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-bforest]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-bitset]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-bitset]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-codegen]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-codegen]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-codegen-meta]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-codegen-meta]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-codegen-shared]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-codegen-shared]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-control]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-control]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-entity]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-entity]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-frontend]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-frontend]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-interpreter]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-interpreter]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-isle]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-isle]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-jit]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-jit]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-module]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-module]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-native]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-native]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-object]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-object]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-reader]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-reader]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.cranelift-serde]] version = "0.119.0" audited_as = "0.117.2" [[unpublished.cranelift-serde]] -version = "0.119.0" -audited_as = "0.117.2" +version = "0.120.0" +audited_as = "0.118.0" [[unpublished.pulley-interpreter]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.pulley-interpreter]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasi-common]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasi-common]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-asm-macros]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-asm-macros]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-cache]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-cache]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-cli]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-cli]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-cli-flags]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-cli-flags]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-component-macro]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-component-macro]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-component-util]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-component-util]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-cranelift]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-cranelift]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-environ]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-environ]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-explorer]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-explorer]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-fiber]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-fiber]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-jit-debug]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-jit-debug]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-jit-icache-coherence]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-jit-icache-coherence]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-math]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-math]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-slab]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-slab]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wasi]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wasi]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wasi-config]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wasi-config]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wasi-http]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wasi-http]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wasi-io]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wasi-io]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wasi-keyvalue]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wasi-keyvalue]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wasi-nn]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wasi-nn]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wasi-threads]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wasi-threads]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wast]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wast]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-winch]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-winch]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wit-bindgen]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wit-bindgen]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wasmtime-wmemcheck]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wasmtime-wmemcheck]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wiggle]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wiggle]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wiggle-generate]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wiggle-generate]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wiggle-macro]] version = "32.0.0" audited_as = "30.0.2" [[unpublished.wiggle-macro]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[unpublished.wiggle-test]] version = "0.0.0" @@ -410,8 +410,8 @@ version = "32.0.0" audited_as = "30.0.2" [[unpublished.winch-codegen]] -version = "32.0.0" -audited_as = "30.0.2" +version = "33.0.0" +audited_as = "31.0.0" [[publisher.aho-corasick]] version = "1.0.2" @@ -603,116 +603,116 @@ user-login = "jrmuizel" user-name = "Jeff Muizelaar" [[publisher.cranelift]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-assembler-x64]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-assembler-x64-meta]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-bforest]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-bitset]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-codegen]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-codegen-meta]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-codegen-shared]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-control]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-entity]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-frontend]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-interpreter]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-isle]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-jit]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-module]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-native]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-object]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-reader]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.cranelift-serde]] -version = "0.117.2" -when = "2025-02-25" +version = "0.118.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" @@ -920,8 +920,8 @@ user-login = "dtolnay" user-name = "David Tolnay" [[publisher.pulley-interpreter]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" @@ -933,8 +933,8 @@ user-login = "dtolnay" user-name = "David Tolnay" [[publisher.regalloc2]] -version = "0.11.2" -when = "2025-04-01" +version = "0.11.3" +when = "2025-04-07" user-id = 3726 user-login = "cfallin" user-name = "Chris Fallin" @@ -1191,8 +1191,8 @@ user-login = "sunfishcode" user-name = "Dan Gohman" [[publisher.wasi-common]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" @@ -1280,158 +1280,158 @@ user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-asm-macros]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-cache]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-cli]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-cli-flags]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-component-macro]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-component-util]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-cranelift]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-environ]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-explorer]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-fiber]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-jit-debug]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-jit-icache-coherence]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-math]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-slab]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wasi]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wasi-config]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wasi-http]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wasi-io]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wasi-keyvalue]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wasi-nn]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wasi-threads]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wast]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-winch]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wit-bindgen]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wasmtime-wmemcheck]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" @@ -1455,20 +1455,20 @@ user-login = "alexcrichton" user-name = "Alex Crichton" [[publisher.wiggle]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wiggle-generate]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" [[publisher.wiggle-macro]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" @@ -1487,8 +1487,8 @@ user-login = "BurntSushi" user-name = "Andrew Gallant" [[publisher.winch-codegen]] -version = "30.0.2" -when = "2025-02-25" +version = "31.0.0" +when = "2025-03-20" user-id = 73222 user-login = "wasmtime-publish" diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs index dee3e534bb..78184feddf 100644 --- a/tests/all/cli_tests.rs +++ b/tests/all/cli_tests.rs @@ -857,7 +857,10 @@ fn run_precompiled_component() -> Result<()> { Ok(()) } +// Disable test on s390x because the large allocation may actually succeed; +// the whole 64-bit address space is available on this platform. #[test] +#[cfg(not(target_arch = "s390x"))] fn memory_growth_failure() -> Result<()> { let output = get_wasmtime_command()? .args(&[ diff --git a/tests/all/memory.rs b/tests/all/memory.rs index f8110018a6..9b93711b71 100644 --- a/tests/all/memory.rs +++ b/tests/all/memory.rs @@ -343,7 +343,10 @@ unsafe fn assert_faults(ptr: *mut u8) { } } +// Disable test on s390x because the large allocation may actually succeed; +// the whole 64-bit address space is available on this platform. #[test] +#[cfg(not(target_arch = "s390x"))] fn massive_64_bit_still_limited() -> Result<()> { // Creating a 64-bit memory which exceeds the limits of the address space // should still send a request to the `ResourceLimiter` to ensure that it diff --git a/tests/disas.rs b/tests/disas.rs index ca77a5ceae..625025c83e 100644 --- a/tests/disas.rs +++ b/tests/disas.rs @@ -112,6 +112,7 @@ struct TestConfig { test: TestKind, flags: Option, objdump: Option, + filter: Option, } #[derive(Debug, Deserialize)] @@ -195,10 +196,16 @@ impl Test { } } let engine = Engine::new(&config).context("failed to create engine")?; - let module = wat::parse_file(&self.path)?; - let elf = engine - .precompile_module(&module) - .context("failed to compile module")?; + let wasm = wat::parse_file(&self.path)?; + let elf = if wasmparser::Parser::is_component(&wasm) { + engine + .precompile_component(&wasm) + .context("failed to compile component")? + } else { + engine + .precompile_module(&wasm) + .context("failed to compile module")? + }; match self.config.test { TestKind::Clif | TestKind::Optimize => { @@ -213,7 +220,8 @@ impl Test { let entry = entry.context("failed to iterate over tempdir")?; let path = entry.path(); if let Some(name) = path.file_name().and_then(|s| s.to_str()) { - if !name.starts_with("wasm_func_") { + let filter = self.config.filter.as_deref().unwrap_or("wasm_func_"); + if !name.contains(filter) { continue; } } @@ -276,6 +284,9 @@ fn assert_output(test: &Test, output: CompileOutput) -> Result<()> { cmd.arg("--traps=false"); } } + if let Some(filter) = &test.config.filter { + cmd.arg("--filter").arg(filter); + } let mut child = cmd.spawn().context("failed to run wasmtime")?; child diff --git a/tests/disas/epoch-interruption.wat b/tests/disas/epoch-interruption.wat index ddee8fa91b..4820634af5 100644 --- a/tests/disas/epoch-interruption.wat +++ b/tests/disas/epoch-interruption.wat @@ -9,33 +9,34 @@ ;; gv1 = load.i64 notrap aligned readonly gv0+8 ;; gv2 = load.i64 notrap aligned gv1+16 ;; gv3 = vmctx +;; gv4 = load.i64 notrap aligned readonly can_move gv3+8 ;; sig0 = (i64 vmctx) -> i64 tail ;; fn0 = colocated u1:16 sig0 ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64): -;; @0016 v5 = load.i64 notrap aligned v0+32 -;; @0016 v6 = load.i64 notrap aligned v5 -;; @0016 v3 = load.i64 notrap aligned readonly can_move v0+8 -;; @0016 v7 = load.i64 notrap aligned v3+8 -;; @0016 v8 = icmp uge v6, v7 -;; @0016 brif v8, block3, block2(v7) +;; @0016 v3 = load.i64 notrap aligned v0+32 +;; @0016 v4 = load.i64 notrap aligned v3 +;; @0016 v5 = load.i64 notrap aligned readonly can_move v0+8 +;; @0016 v6 = load.i64 notrap aligned v5+8 +;; @0016 v7 = icmp uge v4, v6 +;; @0016 brif v7, block3, block2(v6) ;; ;; block3 cold: -;; @0016 v10 = call fn0(v0) -;; @0016 jump block2(v10) +;; @0016 v9 = call fn0(v0) +;; @0016 jump block2(v9) ;; ;; block2(v21: i64): ;; @0017 jump block4(v21) ;; -;; block4(v13: i64): -;; @0017 v12 = load.i64 notrap aligned v5 -;; @0017 v14 = icmp uge v12, v13 -;; @0017 brif v14, block7, block6(v13) +;; block4(v12: i64): +;; @0017 v11 = load.i64 notrap aligned v3 +;; @0017 v13 = icmp uge v11, v12 +;; @0017 brif v13, block7, block6(v12) ;; ;; block7 cold: -;; @0017 v15 = load.i64 notrap aligned v3+8 -;; @0017 v16 = icmp.i64 uge v12, v15 +;; @0017 v15 = load.i64 notrap aligned v5+8 +;; @0017 v16 = icmp.i64 uge v11, v15 ;; @0017 brif v16, block8, block6(v15) ;; ;; block8 cold: diff --git a/tests/disas/riscv64-component-builtins-asm.wat b/tests/disas/riscv64-component-builtins-asm.wat new file mode 100644 index 0000000000..00c788b3d1 --- /dev/null +++ b/tests/disas/riscv64-component-builtins-asm.wat @@ -0,0 +1,53 @@ +;;! target = "riscv64" +;;! test = 'compile' +;;! filter = '_wasm_call' +;;! objdump = '--funcs all' + +(component + (type $a (resource (rep i32))) + (core func $f (canon resource.drop $a)) + + (core module $m (import "" "" (func (param i32)))) + (core instance (instantiate $m (with "" (instance (export "" (func $f)))))) +) + +;; component-resource-drop[0]_wasm_call: +;; addi sp, sp, -0x10 +;; sd ra, 8(sp) +;; sd s0, 0(sp) +;; mv s0, sp +;; addi sp, sp, -0x10 +;; sd s1, 8(sp) +;; mv s1, a1 +;; lw a1, 0(a0) +;; lui a5, 0x706d7 +;; addi a3, a5, -0x9d +;; beq a1, a3, 8 +;; .byte 0x00, 0x00, 0x00, 0x00 +;; ld a1, 0x10(a0) +;; ld a3, 0(s0) +;; sd a3, 0x18(a1) +;; ld a3, 8(s0) +;; sd a3, 0x20(a1) +;; ld a3, 8(a0) +;; ld a3, 0x10(a3) +;; mv a4, zero +;; slli a1, a4, 0x20 +;; srai a1, a1, 0x20 +;; slli a2, a2, 0x20 +;; srai a2, a2, 0x20 +;; jalr a3 +;; addi a3, zero, -1 +;; beq a0, a3, 0x1c +;; ld s1, 8(sp) +;; addi sp, sp, 0x10 +;; ld ra, 8(sp) +;; ld s0, 0(sp) +;; addi sp, sp, 0x10 +;; ret +;; mv a1, s1 +;; ld a4, 0x10(a1) +;; ld a4, 0x138(a4) +;; mv a0, a1 +;; jalr a4 +;; .byte 0x00, 0x00, 0x00, 0x00 diff --git a/tests/disas/riscv64-component-builtins.wat b/tests/disas/riscv64-component-builtins.wat new file mode 100644 index 0000000000..4ee22b7a3a --- /dev/null +++ b/tests/disas/riscv64-component-builtins.wat @@ -0,0 +1,50 @@ +;;! target = "riscv64" +;;! test = 'optimize' +;;! filter = 'component_trampoline_0_Wasm' + +(component + (type $a (resource (rep i32))) + (core func $f (canon resource.drop $a)) + + (core module $m (import "" "" (func (param i32)))) + (core instance (instantiate $m (with "" (instance (export "" (func $f)))))) +) + +;; function u0:0(i64 vmctx, i64, i32) tail { +;; sig0 = (i64 sext, i32 sext, i32 sext) -> i64 sext system_v +;; sig1 = (i64 sext vmctx) system_v +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; v3 = load.i32 notrap aligned little v0 +;; v17 = iconst.i32 0x706d_6f63 +;; v4 = icmp eq v3, v17 ; v17 = 0x706d_6f63 +;; trapz v4, user1 +;; v5 = load.i64 notrap aligned v0+16 +;; v6 = get_frame_pointer.i64 +;; v7 = load.i64 notrap aligned v6 +;; store notrap aligned v7, v5+24 +;; v8 = get_return_address.i64 +;; store notrap aligned v8, v5+32 +;; v10 = load.i64 notrap aligned readonly v0+8 +;; v11 = load.i64 notrap aligned readonly v10+16 +;; v9 = iconst.i32 0 +;; v12 = call_indirect sig0, v11(v0, v9, v2) ; v9 = 0 +;; v13 = iconst.i64 -1 +;; v14 = icmp ne v12, v13 ; v13 = -1 +;; brif v14, block2, block1 +;; +;; block1 cold: +;; v15 = load.i64 notrap aligned readonly v1+16 +;; v16 = load.i64 notrap aligned readonly v15+312 +;; call_indirect sig1, v16(v1) +;; trap user1 +;; +;; block2: +;; brif.i64 v12, block3, block4 +;; +;; block3: +;; jump block4 +;; +;; block4: +;; return +;; } diff --git a/tests/disas/winch/x64/load/grow_load.wat b/tests/disas/winch/x64/load/grow_load.wat index 4b0bada9f6..dde1b16b81 100644 --- a/tests/disas/winch/x64/load/grow_load.wat +++ b/tests/disas/winch/x64/load/grow_load.wat @@ -65,7 +65,7 @@ ;; movq %r14, %rdi ;; movl 0xc(%rsp), %esi ;; movl $0, %edx -;; callq 0x2d6 +;; callq 0x2ed ;; addq $0xc, %rsp ;; addq $4, %rsp ;; movq 0x58(%rsp), %r14 diff --git a/tests/wast.rs b/tests/wast.rs index fa29447eb8..a5fd4a4cd3 100644 --- a/tests/wast.rs +++ b/tests/wast.rs @@ -18,57 +18,81 @@ fn main() { let mut trials = Vec::new(); - // For each test generate a combinatorial matrix of all configurations to - // run this test in. + let mut add_trial = |test: &WastTest, config: WastConfig| { + let trial = Trial::test( + format!( + "{:?}/{}{}{}", + config.compiler, + if config.pooling { "pooling/" } else { "" }, + if config.collector != Collector::Auto { + format!("{:?}/", config.collector) + } else { + String::new() + }, + test.path.to_str().unwrap() + ), + { + let test = test.clone(); + move || run_wast(&test, config).map_err(|e| format!("{e:?}").into()) + }, + ); + + trials.push(trial); + }; + + // List of supported compilers, filtered by what our current host supports. + let mut compilers = vec![ + Compiler::CraneliftNative, + Compiler::Winch, + Compiler::CraneliftPulley, + ]; + compilers.retain(|c| c.supports_host()); + + // Run each wast test in a few interesting configuration combinations, but + // leave the full combinatorial matrix and such to fuzz testing which + // configures many more settings than those configured here. for test in tests { - let test_uses_gc_types = test.test_uses_gc_types(); - for compiler in [ - Compiler::CraneliftNative, - Compiler::Winch, - Compiler::CraneliftPulley, - ] { - // Skip compilers that have no support for this host. - if !compiler.supports_host() { - continue; - } + let collector = if test.test_uses_gc_types() { + Collector::DeferredReferenceCounting + } else { + Collector::Auto + }; + + // Run this test in all supported compilers. + for compiler in compilers.iter().copied() { + add_trial( + &test, + WastConfig { + compiler, + pooling: false, + collector, + }, + ); + } - for pooling in [true, false] { - let collectors: &[_] = if !pooling && test_uses_gc_types { - &[Collector::DeferredReferenceCounting, Collector::Null] - } else { - &[Collector::Auto] - }; - - for collector in collectors.iter().copied() { - let trial = Trial::test( - format!( - "{compiler:?}/{}{}{}", - if pooling { "pooling/" } else { "" }, - if collector != Collector::Auto { - format!("{collector:?}/") - } else { - String::new() - }, - test.path.to_str().unwrap() - ), - { - let test = test.clone(); - move || { - run_wast( - &test, - WastConfig { - compiler, - pooling, - collector, - }, - ) - .map_err(|e| format!("{e:?}").into()) - } - }, - ); - trials.push(trial); - } - } + let compiler = compilers[0]; + + // Run this test with the pooling allocator under the default compiler. + add_trial( + &test, + WastConfig { + compiler, + pooling: true, + collector, + }, + ); + + // If applicable, also run with the null collector in addition to the + // default collector. + if test.test_uses_gc_types() { + add_trial( + &test, + WastConfig { + compiler, + pooling: false, + collector: Collector::Null, + }, + ); } }