Skip to content
This repository was archived by the owner on Sep 8, 2025. It is now read-only.

Commit 1280db7

Browse files
authored
Merge pull request #144 from alexcrichton/merge
Merge with upstream/main
2 parents c31ea25 + 7e86fd8 commit 1280db7

165 files changed

Lines changed: 6234 additions & 3137 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/main.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ jobs:
8080
with:
8181
submodules: true
8282
- run: |
83-
git ls-files '*.h' '*.c' '*.cpp' | \
83+
git ls-files '*.h' '*.c' '*.cpp' '*.hh' '*.cc' | \
8484
grep -v wasmtime-platform.h | \
8585
grep -v wasm.h | \
8686
xargs clang-format-18 --dry-run --Werror --verbose
@@ -895,9 +895,9 @@ jobs:
895895
- run: |
896896
rustup target add wasm32-wasip1 wasm32-unknown-unknown
897897
cd /tmp
898-
curl -OL https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sdk-24.0-x86_64-linux.tar.gz
899-
tar -xzf wasi-sdk-24.0-x86_64-linux.tar.gz
900-
mv wasi-sdk-24.0-x86_64-linux wasi-sdk
898+
curl -OL https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/wasi-sdk-25.0-x86_64-linux.tar.gz
899+
tar -xzf wasi-sdk-25.0-x86_64-linux.tar.gz
900+
mv wasi-sdk-25.0-x86_64-linux wasi-sdk
901901
- run: |
902902
sudo apt-get update && sudo apt-get install -y gdb lldb-15 llvm
903903
# workaround for https://bugs.launchpad.net/ubuntu/+source/llvm-defaults/+bug/1972855

Cargo.lock

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cranelift/codegen/src/data_value.rs

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ pub enum DataValue {
2323
F128(Ieee128),
2424
V128([u8; 16]),
2525
V64([u8; 8]),
26+
V32([u8; 4]),
27+
V16([u8; 2]),
2628
}
2729

2830
impl PartialEq for DataValue {
@@ -51,6 +53,10 @@ impl PartialEq for DataValue {
5153
(V128(_), _) => false,
5254
(V64(l), V64(r)) => l == r,
5355
(V64(_), _) => false,
56+
(V32(l), V32(r)) => l == r,
57+
(V32(_), _) => false,
58+
(V16(l), V16(r)) => l == r,
59+
(V16(_), _) => false,
5460
}
5561
}
5662
}
@@ -83,13 +89,15 @@ impl DataValue {
8389
DataValue::F128(_) => types::F128,
8490
DataValue::V128(_) => types::I8X16, // A default type.
8591
DataValue::V64(_) => types::I8X8, // A default type.
92+
DataValue::V32(_) => types::I8X4, // A default type.
93+
DataValue::V16(_) => types::I8X2, // A default type.
8694
}
8795
}
8896

8997
/// Return true if the value is a vector (i.e. `DataValue::V128`).
9098
pub fn is_vector(&self) -> bool {
9199
match self {
92-
DataValue::V128(_) | DataValue::V64(_) => true,
100+
DataValue::V128(_) | DataValue::V64(_) | DataValue::V32(_) | DataValue::V16(_) => true,
93101
_ => false,
94102
}
95103
}
@@ -113,6 +121,14 @@ impl DataValue {
113121
v.reverse();
114122
DataValue::V64(v)
115123
}
124+
DataValue::V32(mut v) => {
125+
v.reverse();
126+
DataValue::V32(v)
127+
}
128+
DataValue::V16(mut v) => {
129+
v.reverse();
130+
DataValue::V16(v)
131+
}
116132
}
117133
}
118134

@@ -152,6 +168,8 @@ impl DataValue {
152168
DataValue::F128(f) => dst[..16].copy_from_slice(&f.bits().to_ne_bytes()[..]),
153169
DataValue::V128(v) => dst[..16].copy_from_slice(&v[..]),
154170
DataValue::V64(v) => dst[..8].copy_from_slice(&v[..]),
171+
DataValue::V32(v) => dst[..4].copy_from_slice(&v[..]),
172+
DataValue::V16(v) => dst[..2].copy_from_slice(&v[..]),
155173
};
156174
}
157175

@@ -197,15 +215,13 @@ impl DataValue {
197215
types::F128 => DataValue::F128(Ieee128::with_bits(u128::from_ne_bytes(
198216
src[..16].try_into().unwrap(),
199217
))),
200-
_ if ty.is_vector() => {
201-
if ty.bytes() == 16 {
202-
DataValue::V128(src[..16].try_into().unwrap())
203-
} else if ty.bytes() == 8 {
204-
DataValue::V64(src[..8].try_into().unwrap())
205-
} else {
206-
unimplemented!()
207-
}
208-
}
218+
_ if ty.is_vector() => match ty.bytes() {
219+
16 => DataValue::V128(src[..16].try_into().unwrap()),
220+
8 => DataValue::V64(src[..8].try_into().unwrap()),
221+
4 => DataValue::V32(src[..4].try_into().unwrap()),
222+
2 => DataValue::V16(src[..2].try_into().unwrap()),
223+
_ => unimplemented!(),
224+
},
209225
_ => unimplemented!(),
210226
}
211227
}
@@ -324,6 +340,8 @@ build_conversion_impl!(Ieee64, F64, F64);
324340
build_conversion_impl!(Ieee128, F128, F128);
325341
build_conversion_impl!([u8; 16], V128, I8X16);
326342
build_conversion_impl!([u8; 8], V64, I8X8);
343+
build_conversion_impl!([u8; 4], V32, I8X4);
344+
build_conversion_impl!([u8; 2], V16, I8X2);
327345
impl From<Offset32> for DataValue {
328346
fn from(o: Offset32) -> Self {
329347
DataValue::from(Into::<i32>::into(o))
@@ -346,6 +364,8 @@ impl Display for DataValue {
346364
// Again, for syntax consistency, use ConstantData, which in this case displays as hex.
347365
DataValue::V128(dv) => write!(f, "{}", ConstantData::from(&dv[..])),
348366
DataValue::V64(dv) => write!(f, "{}", ConstantData::from(&dv[..])),
367+
DataValue::V32(dv) => write!(f, "{}", ConstantData::from(&dv[..])),
368+
DataValue::V16(dv) => write!(f, "{}", ConstantData::from(&dv[..])),
349369
}
350370
}
351371
}

cranelift/codegen/src/isa/aarch64/inst/mod.rs

Lines changed: 19 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::{settings, CodegenError, CodegenResult};
1010
use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
1111

1212
use alloc::vec::Vec;
13+
use core::slice;
1314
use smallvec::{smallvec, SmallVec};
1415
use std::fmt::Write;
1516
use std::string::{String, ToString};
@@ -231,31 +232,17 @@ impl Inst {
231232
mem,
232233
flags,
233234
},
234-
F16 => Inst::FpuLoad16 {
235-
rd: into_reg,
236-
mem,
237-
flags,
238-
},
239-
F32 => Inst::FpuLoad32 {
240-
rd: into_reg,
241-
mem,
242-
flags,
243-
},
244-
F64 => Inst::FpuLoad64 {
245-
rd: into_reg,
246-
mem,
247-
flags,
248-
},
249235
_ => {
250236
if ty.is_vector() || ty.is_float() {
251237
let bits = ty_bits(ty);
252238
let rd = into_reg;
253239

254-
if bits == 128 {
255-
Inst::FpuLoad128 { rd, mem, flags }
256-
} else {
257-
assert_eq!(bits, 64);
258-
Inst::FpuLoad64 { rd, mem, flags }
240+
match bits {
241+
128 => Inst::FpuLoad128 { rd, mem, flags },
242+
64 => Inst::FpuLoad64 { rd, mem, flags },
243+
32 => Inst::FpuLoad32 { rd, mem, flags },
244+
16 => Inst::FpuLoad16 { rd, mem, flags },
245+
_ => unimplemented!("gen_load({})", ty),
259246
}
260247
} else {
261248
unimplemented!("gen_load({})", ty);
@@ -287,31 +274,17 @@ impl Inst {
287274
mem,
288275
flags,
289276
},
290-
F16 => Inst::FpuStore16 {
291-
rd: from_reg,
292-
mem,
293-
flags,
294-
},
295-
F32 => Inst::FpuStore32 {
296-
rd: from_reg,
297-
mem,
298-
flags,
299-
},
300-
F64 => Inst::FpuStore64 {
301-
rd: from_reg,
302-
mem,
303-
flags,
304-
},
305277
_ => {
306278
if ty.is_vector() || ty.is_float() {
307279
let bits = ty_bits(ty);
308280
let rd = from_reg;
309281

310-
if bits == 128 {
311-
Inst::FpuStore128 { rd, mem, flags }
312-
} else {
313-
assert_eq!(bits, 64);
314-
Inst::FpuStore64 { rd, mem, flags }
282+
match bits {
283+
128 => Inst::FpuStore128 { rd, mem, flags },
284+
64 => Inst::FpuStore64 { rd, mem, flags },
285+
32 => Inst::FpuStore32 { rd, mem, flags },
286+
16 => Inst::FpuStore16 { rd, mem, flags },
287+
_ => unimplemented!("gen_store({})", ty),
315288
}
316289
} else {
317290
unimplemented!("gen_store({})", ty);
@@ -1123,9 +1096,12 @@ impl MachInst for Inst {
11231096
F64 => Ok((&[RegClass::Float], &[F64])),
11241097
F128 => Ok((&[RegClass::Float], &[F128])),
11251098
I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
1126-
_ if ty.is_vector() => {
1127-
assert!(ty.bits() <= 128);
1128-
Ok((&[RegClass::Float], &[I8X16]))
1099+
_ if ty.is_vector() && ty.bits() <= 128 => {
1100+
let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1101+
Ok((
1102+
&[RegClass::Float],
1103+
slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1104+
))
11291105
}
11301106
_ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),
11311107
_ => Err(CodegenError::Unsupported(format!(

cranelift/codegen/src/isa/aarch64/lower.isle

Lines changed: 27 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2546,34 +2546,26 @@
25462546
(rule load_i64_aarch64_uload64 (lower
25472547
(has_type $I64 (load flags address offset)))
25482548
(aarch64_uload64 (amode $I64 address offset) flags))
2549-
(rule (lower
2550-
(has_type $F16 (load flags address offset)))
2551-
(aarch64_fpuload16 (amode $F16 address offset) flags))
2552-
(rule (lower
2553-
(has_type $F32 (load flags address offset)))
2554-
(aarch64_fpuload32 (amode $F32 address offset) flags))
2555-
(rule (lower
2556-
(has_type $F64 (load flags address offset)))
2557-
(aarch64_fpuload64 (amode $F64 address offset) flags))
2558-
(rule (lower
2559-
(has_type $F128 (load flags address offset)))
2560-
(aarch64_fpuload128 (amode $F128 address offset) flags))
25612549
(rule (lower
25622550
(has_type $I128 (load flags address offset)))
25632551
(aarch64_loadp64 (pair_amode address offset) flags))
25642552
(rule -1 (lower
2565-
(has_type (ty_vec64 _)
2566-
(load flags address offset)))
2567-
(aarch64_fpuload64 (amode $F64 address offset) flags))
2568-
(rule -3 (lower
2569-
(has_type (ty_vec128 _)
2570-
(load flags address offset)))
2571-
(aarch64_fpuload128 (amode $I8X16 address offset) flags))
2553+
(has_type (ty_float_or_vec (ty_16 _)) (load flags address offset)))
2554+
(aarch64_fpuload16 (amode $F16 address offset) flags))
25722555
(rule -2 (lower
2556+
(has_type (ty_float_or_vec (ty_32 _)) (load flags address offset)))
2557+
(aarch64_fpuload32 (amode $F32 address offset) flags))
2558+
(rule -3 (lower
2559+
(has_type (ty_float_or_vec (ty_64 _)) (load flags address offset)))
2560+
(aarch64_fpuload64 (amode $F64 address offset) flags))
2561+
(rule -4 (lower
2562+
(has_type (ty_float_or_vec (ty_128 _)) (load flags address offset)))
2563+
(aarch64_fpuload128 (amode $F128 address offset) flags))
2564+
(rule -5 (lower
25732565
(has_type (ty_dyn_vec64 _)
25742566
(load flags address offset)))
25752567
(aarch64_fpuload64 (amode $F64 address offset) flags))
2576-
(rule -4 (lower
2568+
(rule -6 (lower
25772569
(has_type (ty_dyn_vec128 _)
25782570
(load flags address offset)))
25792571
(aarch64_fpuload128 (amode $I8X16 address offset) flags))
@@ -2666,23 +2658,6 @@
26662658
(side_effect
26672659
(aarch64_store32 (amode $I32 address offset) flags value)))
26682660

2669-
(rule (lower
2670-
(store flags value @ (value_type $F16) address offset))
2671-
(side_effect
2672-
(aarch64_fpustore16 (amode $F16 address offset) flags value)))
2673-
(rule (lower
2674-
(store flags value @ (value_type $F32) address offset))
2675-
(side_effect
2676-
(aarch64_fpustore32 (amode $F32 address offset) flags value)))
2677-
(rule (lower
2678-
(store flags value @ (value_type $F64) address offset))
2679-
(side_effect
2680-
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
2681-
(rule (lower
2682-
(store flags value @ (value_type $F128) address offset))
2683-
(side_effect
2684-
(aarch64_fpustore128 (amode $F128 address offset) flags value)))
2685-
26862661
(rule (lower
26872662
(store flags value @ (value_type $I128) address offset))
26882663
(side_effect
@@ -2691,18 +2666,27 @@
26912666
(value_regs_get value 1))))
26922667

26932668
(rule -1 (lower
2694-
(store flags value @ (value_type (ty_vec64 _)) address offset))
2669+
(store flags value @ (value_type (ty_float_or_vec (ty_16 _))) address offset))
26952670
(side_effect
2696-
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
2671+
(aarch64_fpustore16 (amode $F16 address offset) flags value)))
2672+
(rule -2 (lower
2673+
(store flags value @ (value_type (ty_float_or_vec (ty_32 _))) address offset))
2674+
(side_effect
2675+
(aarch64_fpustore32 (amode $F32 address offset) flags value)))
26972676
(rule -3 (lower
2698-
(store flags value @ (value_type (ty_vec128 _)) address offset))
2677+
(store flags value @ (value_type (ty_float_or_vec (ty_64 _))) address offset))
26992678
(side_effect
2700-
(aarch64_fpustore128 (amode $I8X16 address offset) flags value)))
2701-
(rule -2 (lower
2679+
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
2680+
(rule -4 (lower
2681+
(store flags value @ (value_type (ty_float_or_vec (ty_128 _))) address offset))
2682+
(side_effect
2683+
(aarch64_fpustore128 (amode $F128 address offset) flags value)))
2684+
2685+
(rule -5 (lower
27022686
(store flags value @ (value_type (ty_dyn_vec64 _)) address offset))
27032687
(side_effect
27042688
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
2705-
(rule -4 (lower
2689+
(rule -6 (lower
27062690
(store flags value @ (value_type (ty_dyn_vec128 _)) address offset))
27072691
(side_effect
27082692
(aarch64_fpustore128 (amode $I8X16 address offset) flags value)))

cranelift/codegen/src/isa/riscv64/lower.isle

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2214,6 +2214,12 @@
22142214
(amode AMode (amode addr offset)))
22152215
(vec_store eew (VecAMode.UnitStride amode) src flags (unmasked) ty)))
22162216

2217+
;; Avoid unnecessary moves to floating point registers for `F16` memory to memory copies when
2218+
;; `Zfhmin` is unavailable.
2219+
(rule 3 (lower (store store_flags (sinkable_load inst $F16 load_flags load_addr load_offset) store_addr store_offset))
2220+
(if-let false (has_zfhmin))
2221+
(rv_store (amode store_addr store_offset) (StoreOP.Sh) store_flags (gen_sunk_load inst (amode load_addr load_offset) (LoadOP.Lh) load_flags)))
2222+
22172223

22182224
;;;;; Rules for `icmp`;;;;;;;;;
22192225

cranelift/codegen/src/isa/s390x/abi.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ fn in_int_reg(ty: Type) -> bool {
166166

167167
fn in_flt_reg(ty: Type) -> bool {
168168
match ty {
169-
types::F32 | types::F64 => true,
169+
types::F16 | types::F32 | types::F64 => true,
170170
_ => false,
171171
}
172172
}

0 commit comments

Comments
 (0)