From d3277c005efb72df0f92a9b20992f096477175a1 Mon Sep 17 00:00:00 2001 From: katelyn martin Date: Fri, 16 Jul 2021 11:37:18 -0400 Subject: [PATCH 01/93] =?UTF-8?q?=F0=9F=94=AD=20add=20simple=20entity=20te?= =?UTF-8?q?sts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cranelift/entity/src/lib.rs | 69 +++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs index 09054719e0..581a27afaa 100644 --- a/cranelift/entity/src/lib.rs +++ b/cranelift/entity/src/lib.rs @@ -150,3 +150,72 @@ pub use self::map::SecondaryMap; pub use self::primary::PrimaryMap; pub use self::set::EntitySet; pub use self::sparse::{SparseMap, SparseMapValue, SparseSet}; + +/// A collection of tests to ensure that use of the different `entity_impl!` forms will generate +/// `EntityRef` implementations that behave the same way. +#[cfg(test)] +mod tests { + /// A macro used to emit some basic tests to show that entities behave as we expect. + macro_rules! entity_test { + ($entity:ident) => { + #[test] + fn from_usize_to_u32() { + let e = $entity::new(42); + assert_eq!(e.as_u32(), 42_u32); + } + + #[test] + fn from_u32_to_usize() { + let e = $entity::from_u32(42); + assert_eq!(e.index(), 42_usize); + } + + #[test] + fn comparisons_work() { + let a = $entity::from_u32(42); + let b = $entity::new(42); + assert_eq!(a, b); + } + + #[should_panic] + #[test] + fn cannot_construct_from_reserved_u32() { + use crate::packed_option::ReservedValue; + let reserved = $entity::reserved_value().as_u32(); + let _ = $entity::from_u32(reserved); // panic + } + + #[should_panic] + #[test] + fn cannot_construct_from_reserved_usize() { + use crate::packed_option::ReservedValue; + let reserved = $entity::reserved_value().index(); + let _ = $entity::new(reserved); // panic + } + }; + } + + /// Test cases for a plain ol' `EntityRef` implementation. + mod basic_entity { + use crate::EntityRef; + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + struct BasicEntity(u32); + entity_impl!(BasicEntity); + entity_test!(BasicEntity); + } + + /// Test cases for an `EntityRef` implementation that includes a display prefix. + mod prefix_entity { + use crate::EntityRef; + #[derive(Clone, Copy, PartialEq, Eq)] + struct PrefixEntity(u32); + entity_impl!(PrefixEntity, "prefix-"); + entity_test!(PrefixEntity); + + #[test] + fn display_prefix_works() { + let e = PrefixEntity::new(0); + assert_eq!(alloc::format!("{}", e), "prefix-0"); + } + } +} From 2e8f7bacf8d61c3ab49c18d30a871bfc9f7e5ddb Mon Sep 17 00:00:00 2001 From: katelyn martin Date: Fri, 16 Jul 2021 11:35:29 -0400 Subject: [PATCH 02/93] =?UTF-8?q?=F0=9F=8C=88=20provide=20a=20new=20form?= =?UTF-8?q?=20of=20`entity=5Fimpl!`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit N.B. There is likely still some light refactoring to do, so that we are not duplicating so much code. We should also additionally introduce some test coverage. --- cranelift/entity/src/lib.rs | 62 +++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs index 581a27afaa..2665a1ce25 100644 --- a/cranelift/entity/src/lib.rs +++ b/cranelift/entity/src/lib.rs @@ -129,6 +129,68 @@ macro_rules! entity_impl { } } }; + + // Alternate form for tuples we can't directly construct; providing "to" and "from" expressions + // to turn an index *into* an entity, or get an index *from* an entity. + ($entity:ident, $display_prefix:expr, $arg:ident, $to_expr:expr, $from_expr:expr) => { + impl $crate::EntityRef for $entity { + #[inline] + fn new(index: usize) -> Self { + debug_assert!(index < ($crate::__core::u32::MAX as usize)); + let $arg = index as u32; + $to_expr + } + + #[inline] + fn index(self) -> usize { + let $arg = self; + $from_expr as usize + } + } + + impl $crate::packed_option::ReservedValue for $entity { + #[inline] + fn reserved_value() -> $entity { + $entity::from_u32($crate::__core::u32::MAX) + } + + #[inline] + fn is_reserved_value(&self) -> bool { + self.as_u32() == $crate::__core::u32::MAX + } + } + + impl $entity { + /// Create a new instance from a `u32`. + #[allow(dead_code)] + #[inline] + pub fn from_u32(x: u32) -> Self { + debug_assert!(x < $crate::__core::u32::MAX); + let $arg = x; + $to_expr + } + + /// Return the underlying index value as a `u32`. + #[allow(dead_code)] + #[inline] + pub fn as_u32(self) -> u32 { + let $arg = self; + $from_expr + } + } + + impl $crate::__core::fmt::Display for $entity { + fn fmt(&self, f: &mut $crate::__core::fmt::Formatter) -> $crate::__core::fmt::Result { + write!(f, concat!($display_prefix, "{}"), self.as_u32()) + } + } + + impl $crate::__core::fmt::Debug for $entity { + fn fmt(&self, f: &mut $crate::__core::fmt::Formatter) -> $crate::__core::fmt::Result { + (self as &dyn $crate::__core::fmt::Display).fmt(f) + } + } + }; } pub mod packed_option; From 87726882dd5c7e64aefa9599f944afdda0585ede Mon Sep 17 00:00:00 2001 From: katelyn martin Date: Fri, 16 Jul 2021 14:21:31 -0400 Subject: [PATCH 03/93] =?UTF-8?q?=E2=9C=85=20add=20test=20cases=20for=20ne?= =?UTF-8?q?w=20`entity=5Fimpl!`=20form?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cranelift/entity/src/lib.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs index 2665a1ce25..8cddb830dd 100644 --- a/cranelift/entity/src/lib.rs +++ b/cranelift/entity/src/lib.rs @@ -280,4 +280,35 @@ mod tests { assert_eq!(alloc::format!("{}", e), "prefix-0"); } } + + /// Test cases for an `EntityRef` implementation for a type we can only construct through + /// other means, such as calls to `core::convert::From`. + mod other_entity { + mod inner { + #[derive(Clone, Copy, PartialEq, Eq)] + pub struct InnerEntity(u32); + + impl From for InnerEntity { + fn from(x: u32) -> Self { + Self(x) + } + } + + impl From for u32 { + fn from(x: InnerEntity) -> Self { + x.0 + } + } + } + + use {crate::EntityRef, self::inner::InnerEntity}; + entity_impl!(InnerEntity, "inner-", i, InnerEntity::from(i), u32::from(i)); + entity_test!(InnerEntity); + + #[test] + fn display_prefix_works() { + let e = InnerEntity::new(0); + assert_eq!(alloc::format!("{}", e), "inner-0"); + } + } } From 1b9ff6b181183d33c8fa8e13bede8d5f32d2569f Mon Sep 17 00:00:00 2001 From: katelyn martin Date: Fri, 16 Jul 2021 16:41:44 -0400 Subject: [PATCH 04/93] =?UTF-8?q?=F0=9F=A5=93=20rust=20fmt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cranelift/entity/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs index 8cddb830dd..6dac449083 100644 --- a/cranelift/entity/src/lib.rs +++ b/cranelift/entity/src/lib.rs @@ -301,7 +301,7 @@ mod tests { } } - use {crate::EntityRef, self::inner::InnerEntity}; + use {self::inner::InnerEntity, crate::EntityRef}; entity_impl!(InnerEntity, "inner-", i, InnerEntity::from(i), u32::from(i)); entity_test!(InnerEntity); From 80d596b055302e6ee70056edd0c326cfebcc10ec Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Fri, 10 Sep 2021 09:32:56 +0100 Subject: [PATCH 05/93] AArch64 LSE atomic_rmw support Rename the existing AtomicRMW to AtomicRMWLoop and directly lower atomic_rmw operations, without a loop if LSE support is available. Copyright (c) 2021, Arm Limited --- .../codegen/src/isa/aarch64/inst/emit.rs | 34 +- .../src/isa/aarch64/inst/emit_tests.rs | 355 +++++++++++++++++- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 77 +++- .../codegen/src/isa/aarch64/lower_inst.rs | 47 ++- .../filetests/isa/aarch64/atomic-rmw-lse.clif | 114 ++++++ 5 files changed, 605 insertions(+), 22 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 74ec299bed..198fa67e37 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -504,6 +504,33 @@ fn enc_dmb_ish() -> u32 { 0xD5033BBF } +fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable, rn: Reg) -> u32 { + assert!(machreg_to_gpr(rt.to_reg()) != 31); + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + let op = match op { + AtomicRMWOp::Add => 0b000, + AtomicRMWOp::Clr => 0b001, + AtomicRMWOp::Eor => 0b010, + AtomicRMWOp::Set => 0b011, + AtomicRMWOp::Smax => 0b100, + AtomicRMWOp::Smin => 0b101, + AtomicRMWOp::Umax => 0b110, + AtomicRMWOp::Umin => 0b111, + }; + 0b00_111_000_111_00000_0_000_00_00000_00000 + | (sz << 30) + | (machreg_to_gpr(rs) << 16) + | (op << 12) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt.to_reg()) +} + fn enc_ldar(ty: Type, rt: Writable, rn: Reg) -> u32 { let sz = match ty { I64 => 0b11, @@ -1318,7 +1345,10 @@ impl MachInstEmit for Inst { } => { sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); } - &Inst::AtomicRMW { ty, op } => { + &Inst::AtomicRMW { ty, op, rs, rt, rn } => { + sink.put4(enc_ldal(ty, op, rs, rt, rn)); + } + &Inst::AtomicRMWLoop { ty, op } => { /* Emit this: again: ldaxr{,b,h} x/w27, [x25] @@ -1340,7 +1370,7 @@ impl MachInstEmit for Inst { so that we simply write in the destination, the "2nd arg for op". */ // TODO: We should not hardcode registers here, a better idea would be to - // pass some scratch registers in the AtomicRMW pseudo-instruction, and use those + // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those let xzr = zero_reg(); let x24 = xreg(24); let x25 = xreg(25); diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 9e45c6795c..bd4df557ec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -5887,7 +5887,7 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::AtomicRMW { + Inst::AtomicRMWLoop { ty: I16, op: inst_common::AtomicRmwOp::Xor, }, @@ -5897,6 +5897,359 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Add, + rs: xreg(1), + rt: writable_xreg(2), + rn: xreg(3), + }, + "6200E138", + "ldaddalb w1, w2, [x3]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Add, + rs: xreg(4), + rt: writable_xreg(5), + rn: xreg(6), + }, + "C500E478", + "ldaddalh w4, w5, [x6]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Add, + rs: xreg(7), + rt: writable_xreg(8), + rn: xreg(9), + }, + "2801E7B8", + "ldaddal w7, w8, [x9]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Add, + rs: xreg(10), + rt: writable_xreg(11), + rn: xreg(12), + }, + "8B01EAF8", + "ldaddal x10, x11, [x12]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Clr, + rs: xreg(13), + rt: writable_xreg(14), + rn: xreg(15), + }, + "EE11ED38", + "ldclralb w13, w14, [x15]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Clr, + rs: xreg(16), + rt: writable_xreg(17), + rn: xreg(18), + }, + "5112F078", + "ldclralh w16, w17, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Clr, + rs: xreg(19), + rt: writable_xreg(20), + rn: xreg(21), + }, + "B412F3B8", + "ldclral w19, w20, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Clr, + rs: xreg(22), + rt: writable_xreg(23), + rn: xreg(24), + }, + "1713F6F8", + "ldclral x22, x23, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Eor, + rs: xreg(25), + rt: writable_xreg(26), + rn: xreg(27), + }, + "7A23F938", + "ldeoralb w25, w26, [x27]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Eor, + rs: xreg(28), + rt: writable_xreg(29), + rn: xreg(30), + }, + "DD23FC78", + "ldeoralh w28, fp, [lr]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Eor, + rs: xreg(29), + rt: writable_xreg(28), + rn: xreg(27), + }, + "7C23FDB8", + "ldeoral fp, w28, [x27]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Eor, + rs: xreg(26), + rt: writable_xreg(25), + rn: xreg(24), + }, + "1923FAF8", + "ldeoral x26, x25, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Set, + rs: xreg(23), + rt: writable_xreg(22), + rn: xreg(21), + }, + "B632F738", + "ldsetalb w23, w22, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Set, + rs: xreg(20), + rt: writable_xreg(19), + rn: xreg(18), + }, + "5332F478", + "ldsetalh w20, w19, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Set, + rs: xreg(17), + rt: writable_xreg(16), + rn: xreg(15), + }, + "F031F1B8", + "ldsetal w17, w16, [x15]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Set, + rs: xreg(14), + rt: writable_xreg(13), + rn: xreg(12), + }, + "8D31EEF8", + "ldsetal x14, x13, [x12]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Smax, + rs: xreg(11), + rt: writable_xreg(10), + rn: xreg(9), + }, + "2A41EB38", + "ldsmaxalb w11, w10, [x9]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Smax, + rs: xreg(8), + rt: writable_xreg(7), + rn: xreg(6), + }, + "C740E878", + "ldsmaxalh w8, w7, [x6]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Smax, + rs: xreg(5), + rt: writable_xreg(4), + rn: xreg(3), + }, + "6440E5B8", + "ldsmaxal w5, w4, [x3]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Smax, + rs: xreg(2), + rt: writable_xreg(1), + rn: xreg(0), + }, + "0140E2F8", + "ldsmaxal x2, x1, [x0]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Smin, + rs: xreg(1), + rt: writable_xreg(2), + rn: xreg(3), + }, + "6250E138", + "ldsminalb w1, w2, [x3]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Smin, + rs: xreg(4), + rt: writable_xreg(5), + rn: xreg(6), + }, + "C550E478", + "ldsminalh w4, w5, [x6]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Smin, + rs: xreg(7), + rt: writable_xreg(8), + rn: xreg(9), + }, + "2851E7B8", + "ldsminal w7, w8, [x9]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Smin, + rs: xreg(10), + rt: writable_xreg(11), + rn: xreg(12), + }, + "8B51EAF8", + "ldsminal x10, x11, [x12]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Umax, + rs: xreg(13), + rt: writable_xreg(14), + rn: xreg(15), + }, + "EE61ED38", + "ldumaxalb w13, w14, [x15]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Umax, + rs: xreg(16), + rt: writable_xreg(17), + rn: xreg(18), + }, + "5162F078", + "ldumaxalh w16, w17, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Umax, + rs: xreg(19), + rt: writable_xreg(20), + rn: xreg(21), + }, + "B462F3B8", + "ldumaxal w19, w20, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Umax, + rs: xreg(22), + rt: writable_xreg(23), + rn: xreg(24), + }, + "1763F6F8", + "ldumaxal x22, x23, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Umin, + rs: xreg(16), + rt: writable_xreg(17), + rn: xreg(18), + }, + "5172F038", + "lduminalb w16, w17, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Umin, + rs: xreg(19), + rt: writable_xreg(20), + rn: xreg(21), + }, + "B472F378", + "lduminalh w19, w20, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Umin, + rs: xreg(22), + rt: writable_xreg(23), + rn: xreg(24), + }, + "1773F6B8", + "lduminal w22, w23, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Umin, + rs: xreg(25), + rt: writable_xreg(26), + rn: xreg(27), + }, + "7A73F9F8", + "lduminal x25, x26, [x27]", + )); + + insns.push(( + Inst::AtomicRMWLoop { ty: I32, op: inst_common::AtomicRmwOp::Xchg, }, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index ac4d958bb1..f97cd75ef8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -451,6 +451,19 @@ pub enum VecShiftImmOp { Sshr, } +/// Atomic read-modify-write operations with acquire-release semantics +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum AtomicRMWOp { + Add, + Clr, + Eor, + Set, + Smax, + Smin, + Umax, + Umin, +} + /// An operation on the bits of a register. This can be paired with several instruction formats /// below (see `Inst`) in any combination. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] @@ -775,11 +788,22 @@ pub enum Inst { /// x27 (wr) old value /// x24 (wr) scratch reg; value afterwards has no meaning /// x28 (wr) scratch reg; value afterwards has no meaning - AtomicRMW { + AtomicRMWLoop { ty: Type, // I8, I16, I32 or I64 op: inst_common::AtomicRmwOp, }, + /// An atomic read-modify-write operation. These instructions require the + /// Large System Extension (LSE) ISA support. The instructions have acquire-release + /// semantics. + AtomicRMW { + op: AtomicRMWOp, + rs: Reg, + rt: Writable, + rn: Reg, + ty: Type, + }, + /// An atomic compare-and-swap operation. This instruction is sequentially consistent. AtomicCAS { rs: Writable, @@ -788,10 +812,10 @@ pub enum Inst { ty: Type, }, - /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked + /// Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked /// store-conditional loop. /// This instruction is sequentially consistent. - /// Note that the operand conventions, although very similar to AtomicRMW, are different: + /// Note that the operand conventions, although very similar to AtomicRMWLoop, are different: /// /// x25 (rd) address /// x26 (rd) expected value @@ -1919,13 +1943,18 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::CCmpImm { rn, .. } => { collector.add_use(rn); } - &Inst::AtomicRMW { .. } => { + &Inst::AtomicRMWLoop { .. } => { collector.add_use(xreg(25)); collector.add_use(xreg(26)); collector.add_def(writable_xreg(24)); collector.add_def(writable_xreg(27)); collector.add_def(writable_xreg(28)); } + &Inst::AtomicRMW { rs, rt, rn, .. } => { + collector.add_use(rs); + collector.add_def(rt); + collector.add_use(rn); + } &Inst::AtomicCAS { rs, rt, rn, .. } => { collector.add_mod(rs); collector.add_use(rt); @@ -2561,9 +2590,19 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut Inst::CCmpImm { ref mut rn, .. } => { map_use(mapper, rn); } - &mut Inst::AtomicRMW { .. } => { + &mut Inst::AtomicRMWLoop { .. } => { // There are no vregs to map in this insn. } + &mut Inst::AtomicRMW { + ref mut rs, + ref mut rt, + ref mut rn, + .. + } => { + map_use(mapper, rs); + map_def(mapper, rt); + map_use(mapper, rn); + } &mut Inst::AtomicCAS { ref mut rs, ref mut rt, @@ -3617,7 +3656,33 @@ impl Inst { let cond = cond.show_rru(mb_rru); format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) } - &Inst::AtomicRMW { ty, op, .. } => { + &Inst::AtomicRMW { + rs, rt, rn, ty, op + } => { + let op = match op { + AtomicRMWOp::Add => "ldaddal", + AtomicRMWOp::Clr => "ldclral", + AtomicRMWOp::Eor => "ldeoral", + AtomicRMWOp::Set => "ldsetal", + AtomicRMWOp::Smax => "ldsmaxal", + AtomicRMWOp::Umax => "ldumaxal", + AtomicRMWOp::Smin => "ldsminal", + AtomicRMWOp::Umin => "lduminal", + }; + + let size = OperandSize::from_ty(ty); + let rs = show_ireg_sized(rs, mb_rru, size); + let rt = show_ireg_sized(rt.to_reg(), mb_rru, size); + let rn = rn.show_rru(mb_rru); + + let ty_suffix = match ty { + I8 => "b", + I16 => "h", + _ => "", + }; + format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn) + } + &Inst::AtomicRMWLoop { ty, op, .. } => { format!( "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}", ty.bits(), op) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 10c6807555..c743f642a4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1529,20 +1529,41 @@ pub(crate) fn lower_insn_to_regs>( let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let ty_access = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty_access)); - // Make sure that both args are in virtual regs, since in effect - // we have to do a parallel copy to get them safely to the AtomicRMW input - // regs, and that's not guaranteed safe if either is in a real reg. - r_addr = ctx.ensure_in_vreg(r_addr, I64); - r_arg2 = ctx.ensure_in_vreg(r_arg2, I64); - // Move the args to the preordained AtomicRMW input regs - ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); - ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64)); - // Now the AtomicRMW insn itself + let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap()); - ctx.emit(Inst::AtomicRMW { ty: ty_access, op }); - // And finally, copy the preordained AtomicRMW output reg to its destination. - ctx.emit(Inst::gen_move(r_dst, xreg(27), I64)); - // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that. + let lse_op = match op { + AtomicRmwOp::Add => Some(AtomicRMWOp::Add), + AtomicRmwOp::And => Some(AtomicRMWOp::Clr), + AtomicRmwOp::Xor => Some(AtomicRMWOp::Eor), + AtomicRmwOp::Or => Some(AtomicRMWOp::Set), + AtomicRmwOp::Smax => Some(AtomicRMWOp::Smax), + AtomicRmwOp::Umax => Some(AtomicRMWOp::Umax), + AtomicRmwOp::Smin => Some(AtomicRMWOp::Smin), + AtomicRmwOp::Umin => Some(AtomicRMWOp::Umin), + _ => None + }; + if isa_flags.use_lse() && lse_op.is_some() { + ctx.emit(Inst::AtomicRMW { + op: lse_op.unwrap(), + rs: r_arg2, + rt: r_dst, + rn: r_addr, + ty: ty_access, + }); + } else { + // Make sure that both args are in virtual regs, since in effect + // we have to do a parallel copy to get them safely to the AtomicRMW input + // regs, and that's not guaranteed safe if either is in a real reg. + r_addr = ctx.ensure_in_vreg(r_addr, I64); + r_arg2 = ctx.ensure_in_vreg(r_arg2, I64); + // Move the args to the preordained AtomicRMW input regs + ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); + ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64)); + ctx.emit(Inst::AtomicRMWLoop { ty: ty_access, op }); + // And finally, copy the preordained AtomicRMW output reg to its destination. + ctx.emit(Inst::gen_move(r_dst, xreg(27), I64)); + // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that. + } } Opcode::AtomicCas => { diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif new file mode 100644 index 0000000000..9157c99977 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif @@ -0,0 +1,114 @@ +test compile +target aarch64 has_lse + +function %atomic_rmw_add_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 add v0, v1 + return +} +; check: ldaddal x1, x0, [x0] + +function %atomic_rmw_add_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 add v0, v1 + return +} +; check: ldaddal w1, w0, [x0] + +function %atomic_rmw_and_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 and v0, v1 + return +} +; check: ldclral x1, x0, [x0] + +function %atomic_rmw_and_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 and v0, v1 + return +} +; check: ldclral w1, w0, [x0] + +function %atomic_rmw_or_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 or v0, v1 + return +} +; check: ldsetal x1, x0, [x0] + +function %atomic_rmw_or_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 or v0, v1 + return +} +; check: ldsetal w1, w0, [x0] + +function %atomic_rmw_xor_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 xor v0, v1 + return +} +; check: ldeoral x1, x0, [x0] + +function %atomic_rmw_xor_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 xor v0, v1 + return +} +; check: ldeoral w1, w0, [x0] + +function %atomic_rmw_smax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smax v0, v1 + return +} +; check: ldsmaxal x1, x0, [x0] + +function %atomic_rmw_smax_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 smax v0, v1 + return +} +; check: ldsmaxal w1, w0, [x0] + +function %atomic_rmw_umax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umax v0, v1 + return +} +; check: ldumaxal x1, x0, [x0] + +function %atomic_rmw_umax_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 umax v0, v1 + return +} +; check: ldumaxal w1, w0, [x0] + +function %atomic_rmw_smin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smin v0, v1 + return +} +; check: ldsminal x1, x0, [x0] + +function %atomic_rmw_smin_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 smin v0, v1 + return +} +; check: ldsminal w1, w0, [x0] + +function %atomic_rmw_umin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umin v0, v1 + return +} +; check: lduminal x1, x0, [x0] + +function %atomic_rmw_umin_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 umin v0, v1 + return +} +; check: lduminal w1, w0, [x0] From 7da76f06017820554710c848812e9ac462d775e4 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 14 Sep 2021 15:05:40 +0100 Subject: [PATCH 06/93] cargo fmt --- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 8 +++----- cranelift/codegen/src/isa/aarch64/lower_inst.rs | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index f97cd75ef8..d95a92cf91 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -794,8 +794,8 @@ pub enum Inst { }, /// An atomic read-modify-write operation. These instructions require the - /// Large System Extension (LSE) ISA support. The instructions have acquire-release - /// semantics. + /// Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have + /// acquire-release semantics. AtomicRMW { op: AtomicRMWOp, rs: Reg, @@ -3656,9 +3656,7 @@ impl Inst { let cond = cond.show_rru(mb_rru); format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) } - &Inst::AtomicRMW { - rs, rt, rn, ty, op - } => { + &Inst::AtomicRMW { rs, rt, rn, ty, op } => { let op = match op { AtomicRMWOp::Add => "ldaddal", AtomicRMWOp::Clr => "ldclral", diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index c743f642a4..af549e43a4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1540,7 +1540,7 @@ pub(crate) fn lower_insn_to_regs>( AtomicRmwOp::Umax => Some(AtomicRMWOp::Umax), AtomicRmwOp::Smin => Some(AtomicRMWOp::Smin), AtomicRmwOp::Umin => Some(AtomicRMWOp::Umin), - _ => None + _ => None, }; if isa_flags.use_lse() && lse_op.is_some() { ctx.emit(Inst::AtomicRMW { From 62a317773ab5501085c7c0c0154b18ebc71e33ac Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 15 Sep 2021 16:07:36 +0100 Subject: [PATCH 07/93] added aarch64 has_lse targets to atomic-rmw run tests --- cranelift/filetests/filetests/runtests/atomic-rmw-2.clif | 1 + cranelift/filetests/filetests/runtests/atomic-rmw.clif | 1 + 2 files changed, 2 insertions(+) diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif index 2213c72be3..f48f4a953e 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif @@ -1,5 +1,6 @@ test run target aarch64 +target aarch64 has_lse target x86_64 machinst ; TODO: Merge this with atomic-rmw.clif when s390x supports it diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw.clif b/cranelift/filetests/filetests/runtests/atomic-rmw.clif index eb9ed2c4d3..f96f645c33 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw.clif @@ -1,5 +1,6 @@ test run target aarch64 +target aarch64 has_lse target x86_64 machinst target s390x From 224a4b4094b19819a4f08b7cf75e58db5e9a4256 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Tue, 14 Sep 2021 12:28:21 +0100 Subject: [PATCH 08/93] Implement `VhighBits` & `Vselect` for interpreter Implemented the following Opcodes for the Cranelift interpreter: - `VhighBits` to reduce a vector to a scalar integer formed by concatenating the MSB of each lane. - `Vselect` to select lanes from two vectors controlled by a boolean vector. Copyright (c) 2021, Arm Limited --- .../filetests/runtests/simd-vhighbits.clif | 53 +++++++++++++++++++ .../filetests/runtests/simd-vselect.clif | 29 ++++++++++ cranelift/interpreter/src/step.rs | 31 ++++++++++- 3 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-vhighbits.clif diff --git a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif new file mode 100644 index 0000000000..2a9c5d1a75 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif @@ -0,0 +1,53 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %vhighbits_i8x16(i8x16) -> i16 { +block0(v0: i8x16): + v1 = vhigh_bits.i16 v0 + return v1 +} +; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i8x16([1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7]) == 0 +; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128]) == -1 +; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 8]) == 32767 + +function %vhighbits_i16x8(i16x8) -> i8 { +block0(v0: i16x8): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i16x8([0 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i16x8([0 0 0 0 0 0 0 1]) == 0 +; run: %vhighbits_i16x8([1 2 3 4 5 6 7 8]) == 0 +; run: %vhighbits_i16x8([128 128 128 128 128 128 128 128]) == 0 +; run: %vhighbits_i16x8([32768 32768 32768 32768 32768 32768 32768 0]) == 127 + + +function %vhighbits_i32x4(i32x4) -> i8 { +block0(v0: i32x4): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i32x4([0 0 0 0]) == 0 +; run: %vhighbits_i32x4([0 0 0 1]) == 0 +; run: %vhighbits_i32x4([1 2 3 4]) == 0 +; run: %vhighbits_i32x4([128 128 128 128]) == 0 +; run: %vhighbits_i32x4([2147483648 2147483648 2147483648 2147483648]) == 15 +; run: %vhighbits_i32x4([2147483648 0 2147483648 0]) == 5 + + +function %vhighbits_i64x2(i64x2) -> i8 { +block0(v0: i64x2): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i64x2([0 0]) == 0 +; run: %vhighbits_i64x2([0 1]) == 0 +; run: %vhighbits_i64x2([1 2]) == 0 +; run: %vhighbits_i64x2([128 128]) == 0 +; run: %vhighbits_i64x2([18446744073709551615 18446744073709551615]) == 3 +; run: %vhighbits_i64x2([18446744073709551615 0]) == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-vselect.clif b/cranelift/filetests/filetests/runtests/simd-vselect.clif index 3817b2302f..8a78664f5b 100644 --- a/cranelift/filetests/filetests/runtests/simd-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-vselect.clif @@ -1,3 +1,4 @@ +test interpret test run ; target s390x TODO: Not yet implemented on s390x target aarch64 @@ -45,3 +46,31 @@ block0: return v4 } ; run: %vselect_i64x2() == [200 101] + +function %vselect_p_i8x16(b8x16, i8x16, i8x16) -> i8x16 { +block0(v0: b8x16, v1: i8x16, v2: i8x16): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i8x16([true false true true true false false false true false true true true false false false], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [1 18 3 4 5 22 23 24 9 26 11 12 13 30 31 32] + +function %vselect_p_i16x8(b16x8, i16x8, i16x8) -> i16x8 { +block0(v0: b16x8, v1: i16x8, v2: i16x8): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i16x8([true false true true true false false false], [1 2 3 4 5 6 7 8], [17 18 19 20 21 22 23 24]) == [1 18 3 4 5 22 23 24] + +function %vselect_p_i32x4(b32x4, i32x4, i32x4) -> i32x4 { +block0(v0: b32x4, v1: i32x4, v2: i32x4): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i32x4([true false true true], [1 2 3 4], [100000 200000 300000 400000]) == [1 200000 3 4] + +function %vselect_p_i64x2(b64x2, i64x2, i64x2) -> i64x2 { +block0(v0: b64x2, v1: i64x2, v2: i64x2): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i64x2([true false], [1 2], [100000000000 200000000000]) == [1 200000000000] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 6b3eb59ff8..32457d4462 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -824,10 +824,37 @@ where let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; assign(lanes[idx].clone()) } - Opcode::VhighBits => unimplemented!("VhighBits"), + Opcode::VhighBits => { + // `ctrl_ty` controls the return type for this, so the input type + // must be retrieved via `inst_context`. + let lane_type = inst_context + .type_of(inst_context.args()[0]) + .unwrap() + .lane_type(); + let a = extractlanes(&arg(0)?, lane_type)?; + let mut result: i128 = 0; + for (i, val) in a.into_iter().enumerate() { + let val = val.reverse_bits()?.into_int()?; // MSB -> LSB + result |= (val & 1) << i; + } + assign(Value::int(result, ctrl_ty)?) + } Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vconcat => unimplemented!("Vconcat"), - Opcode::Vselect => unimplemented!("Vselect"), + Opcode::Vselect => { + let c = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let x = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let y = extractlanes(&arg(2)?, ctrl_ty.lane_type())?; + let mut new_vec = SimdVec::new(); + for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) { + if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? { + new_vec.push(y); + } else { + new_vec.push(x); + } + } + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } Opcode::VanyTrue => assign(fold_vector( arg(0)?, ctrl_ty, From 83c3bc5b9d72961cb80beb3bec30e631e50f5608 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 8 Sep 2021 17:04:05 +0100 Subject: [PATCH 09/93] Implement `Unarrow`, `Uunarrow`, and `Snarrow` for the interpreter Implemented the following Opcodes for the Cranelift interpreter: - `Unarrow` to combine two SIMD vectors into a new vector with twice the lanes but half the width, with signed inputs which are clamped to `0x00`. - `Uunarrow` to perform the same operation as `Unarrow` but treating inputs as unsigned. - `Snarrow` to perform the same operation as `Unarrow` but treating both inputs and outputs as signed, and saturating accordingly. Note that all 3 instructions saturate at the type boundaries. Copyright (c) 2021, Arm Limited --- .../codegen/meta/src/shared/instructions.rs | 2 +- cranelift/codegen/src/ir/types.rs | 23 ++++++++++++ .../runtests/simd-snarrow-aarch64.clif | 11 ++++++ .../filetests/runtests/simd-snarrow.clif | 19 ++++++++++ .../runtests/simd-unarrow-aarch64.clif | 11 ++++++ .../filetests/runtests/simd-unarrow.clif | 19 ++++++++++ .../filetests/runtests/simd-uunarrow.clif | 26 ++++++++++++++ cranelift/interpreter/src/step.rs | 35 ++++++++++++++----- cranelift/interpreter/src/value.rs | 25 +++++++++++++ 9 files changed, 161 insertions(+), 10 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-snarrow.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-unarrow.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-uunarrow.clif diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 955782039d..1437a2d86d 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4048,7 +4048,7 @@ pub(crate) fn define( Combine `x` and `y` into a vector with twice the lanes but half the integer width while saturating overflowing values to the unsigned maximum and minimum. - Note that all input lanes are considered unsigned. + Note that all input lanes are considered unsigned: any negative values will be interpreted as unsigned, overflowing and being replaced with the unsigned maximum. The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index 4284021190..2d9c7e709e 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -79,6 +79,29 @@ impl Type { } } + /// Get the (minimum, maximum) values represented by each lane in the type. + pub fn bounds(self, signed: bool) -> (i128, i128) { + if signed { + match self.lane_type() { + I8 => (i8::MIN as i128, i8::MAX as i128), + I16 => (i16::MIN as i128, i16::MAX as i128), + I32 => (i32::MIN as i128, i32::MAX as i128), + I64 => (i64::MIN as i128, i64::MAX as i128), + I128 => (i128::MIN, i128::MAX), + _ => unimplemented!(), + } + } else { + match self.lane_type() { + I8 => (u8::MIN as i128, u8::MAX as i128), + I16 => (u16::MIN as i128, u16::MAX as i128), + I32 => (u32::MIN as i128, u32::MAX as i128), + I64 => (u64::MIN as i128, u64::MAX as i128), + I128 => (u128::MIN as i128, u128::MAX as i128), + _ => unimplemented!(), + } + } + } + /// Get an integer type with the requested number of bits. pub fn int(bits: u16) -> Option { match bits { diff --git a/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif new file mode 100644 index 0000000000..84c3de5d05 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif @@ -0,0 +1,11 @@ +test interpret +test run +target aarch64 +; x86_64 considers the case `i64x2` -> `i32x4` to be 'unreachable' + +function %snarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 -100000 2147483647 73] diff --git a/cranelift/filetests/filetests/runtests/simd-snarrow.clif b/cranelift/filetests/filetests/runtests/simd-snarrow.clif new file mode 100644 index 0000000000..18d667f743 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-snarrow.clif @@ -0,0 +1,19 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %snarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i16x8([1 127 128 15 32767 -32 48 0], [8 255 -100 100 -32768 73 80 42]) == [1 127 127 15 127 -32 48 0 8 127 -100 100 -128 73 80 42] + +function %snarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i32x4([32767 1048575 -70000 -5], [268435455 73 268435455 42]) == [32767 32767 -32768 -5 32767 73 32767 42] diff --git a/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif new file mode 100644 index 0000000000..478a1860aa --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif @@ -0,0 +1,11 @@ +test interpret +test run +target aarch64 +; x86_64 considers the case `i64x2 -> i32x4` to be 'unreachable' + +function %unarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 0 4294967295 73] diff --git a/cranelift/filetests/filetests/runtests/simd-unarrow.clif b/cranelift/filetests/filetests/runtests/simd-unarrow.clif new file mode 100644 index 0000000000..0725afd811 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-unarrow.clif @@ -0,0 +1,19 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %unarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 0 0 48 0 8 255 0 100 0 73 80 42] + +function %unarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 0 0 65535 73 65535 42] diff --git a/cranelift/filetests/filetests/runtests/simd-uunarrow.clif b/cranelift/filetests/filetests/runtests/simd-uunarrow.clif new file mode 100644 index 0000000000..b2a68c4480 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-uunarrow.clif @@ -0,0 +1,26 @@ +test interpret +test run +target aarch64 +; x86_64 panics: `Did not match fcvt input! +; thread 'worker #0' panicked at 'register allocation: Analysis(EntryLiveinValues([v2V]))', cranelift/codegen/src/machinst/compile.rs:96:10` + +function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 255 255 48 0 8 255 255 100 255 73 80 42] + +function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 65535 65535 65535 73 65535 42] + +function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 4294967295 4294967295 73] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 25ec8b2878..6a86eb1cb6 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -779,19 +779,36 @@ where arg(0)?, ValueConversionKind::Exact(ctrl_ty), )?), - Opcode::Snarrow => assign(Value::convert( - arg(0)?, - ValueConversionKind::Truncate(ctrl_ty), - )?), + Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => { + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let mut new_vec = SimdVec::new(); + let new_type = ctrl_ty.split_lanes().unwrap(); + let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow); + let mut min: V = Value::int(min, ctrl_ty.lane_type())?; + let mut max: V = Value::int(max, ctrl_ty.lane_type())?; + if inst.opcode() == Opcode::Uunarrow { + min = min.convert(ValueConversionKind::ToUnsigned)?; + max = max.convert(ValueConversionKind::ToUnsigned)?; + } + for mut lane in arg0.into_iter().chain(arg1) { + if inst.opcode() == Opcode::Uunarrow { + lane = lane.convert(ValueConversionKind::ToUnsigned)?; + } + lane = Value::max(lane, min.clone())?; + lane = Value::min(lane, max.clone())?; + lane = lane.convert(ValueConversionKind::Truncate(new_type.lane_type()))?; + if inst.opcode() == Opcode::Unarrow || inst.opcode() == Opcode::Uunarrow { + lane = lane.convert(ValueConversionKind::ToUnsigned)?; + } + new_vec.push(lane); + } + assign(vectorizelanes(&new_vec, new_type)?) + } Opcode::Sextend => assign(Value::convert( arg(0)?, ValueConversionKind::SignExtend(ctrl_ty), )?), - Opcode::Unarrow => assign(Value::convert( - arg(0)?, - ValueConversionKind::Truncate(ctrl_ty), - )?), - Opcode::Uunarrow => unimplemented!("Uunarrow"), Opcode::Uextend => assign(Value::convert( arg(0)?, ValueConversionKind::ZeroExtend(ctrl_ty), diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 768ccfe8e2..b898707199 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -26,6 +26,9 @@ pub trait Value: Clone + From { fn convert(self, kind: ValueConversionKind) -> ValueResult; fn concat(self, other: Self) -> ValueResult; + fn max(self, other: Self) -> ValueResult; + fn min(self, other: Self) -> ValueResult; + // Comparison. fn eq(&self, other: &Self) -> ValueResult; fn gt(&self, other: &Self) -> ValueResult; @@ -302,11 +305,17 @@ impl Value for DataValue { Self::from_integer(extracted, ty)? } ValueConversionKind::SignExtend(ty) => match (self, ty) { + (DataValue::U8(n), types::I16) => DataValue::U16(n as u16), + (DataValue::U8(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U8(n), types::I64) => DataValue::U64(n as u64), (DataValue::I8(n), types::I16) => DataValue::I16(n as i16), (DataValue::I8(n), types::I32) => DataValue::I32(n as i32), (DataValue::I8(n), types::I64) => DataValue::I64(n as i64), + (DataValue::U16(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U16(n), types::I64) => DataValue::U64(n as u64), (DataValue::I16(n), types::I32) => DataValue::I32(n as i32), (DataValue::I16(n), types::I64) => DataValue::I64(n as i64), + (DataValue::U32(n), types::I64) => DataValue::U64(n as u64), (DataValue::I32(n), types::I64) => DataValue::I64(n as i64), (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind), }, @@ -362,6 +371,22 @@ impl Value for DataValue { } } + fn max(self, other: Self) -> ValueResult { + if Value::gt(&self, &other)? { + Ok(self) + } else { + Ok(other) + } + } + + fn min(self, other: Self) -> ValueResult { + if Value::lt(&self, &other)? { + Ok(self) + } else { + Ok(other) + } + } + fn eq(&self, other: &Self) -> ValueResult { comparison_match!(PartialEq::eq[&self, &other]; [I8, I16, I32, I64, U8, U16, U32, U64, F32, F64]) } From 3b9bfc818731d7566279e654ea46525ec6e9e509 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Mon, 6 Sep 2021 15:03:32 +0100 Subject: [PATCH 10/93] Implement `WideningPairwiseDotProductS` for interpreter Implemented `WideningPairwiseDotProductS` to perform sign-extending length-doubling multiplication on corresponding elements from two `i16x8` SIMD vectors, performing a pairwise add on the results (thus returning `i32x4`). Copyright (c) 2021, Arm Limited --- .../simd-wideningpairwisedotproducts.clif | 14 ++++++++++++++ cranelift/interpreter/src/step.rs | 16 +++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif diff --git a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif new file mode 100644 index 0000000000..56987ef79d --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif @@ -0,0 +1,14 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %wpdps(i16x8, i16x8) -> i32x4 { +block0(v0: i16x8, v1: i16x8): + v2 = widening_pairwise_dot_product_s v0, v1 + return v2 +} +; run: %wpdps([1 2 3 4 5 6 7 8], [8000 7000 6000 5000 4000 3000 2000 1000]) == [22000 38000 38000 22000] +; run: %wpdps([1 -2 3 -4 5 -6 7 -8], [32767 32767 32767 32767 -32768 -32768 -32768 -32768]) == [-32767 -32767 32768 32768] +; run: %wpdps([-32768 -32768 32767 32767 -32768 -32768 32767 32767], [-32768 -32768 32767 32767 32767 32767 -32768 -32768]) == [2147483648 2147352578 -2147418112 -2147418112] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index ce53528c20..5157084330 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -845,7 +845,21 @@ where Opcode::AtomicLoad => unimplemented!("AtomicLoad"), Opcode::AtomicStore => unimplemented!("AtomicStore"), Opcode::Fence => unimplemented!("Fence"), - Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"), + Opcode::WideningPairwiseDotProductS => { + let ctrl_ty = types::I16X8; + let new_type = ctrl_ty.merge_lanes().unwrap(); + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let mut new_vec = SimdVec::new(); + for (x, y) in arg0.chunks(2).into_iter().zip(arg1.chunks(2).into_iter()) { + let mut z = 0i128; + for (lhs, rhs) in x.into_iter().zip(y.into_iter()) { + z += lhs.clone().into_int()? * rhs.clone().into_int()?; + } + new_vec.push(Value::int(z, new_type.lane_type())?); + } + assign(vectorizelanes(&new_vec, new_type)?) + } Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), From 8115e7252d27ba67a128bdca529d50ba8490e70d Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 14 Sep 2021 15:21:37 +0100 Subject: [PATCH 11/93] cranelift: Add support for i128 immediates in parser --- cranelift/codegen/src/data_value.rs | 2 ++ cranelift/reader/src/parser.rs | 49 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/cranelift/codegen/src/data_value.rs b/cranelift/codegen/src/data_value.rs index 965ce1bec5..13aa237674 100644 --- a/cranelift/codegen/src/data_value.rs +++ b/cranelift/codegen/src/data_value.rs @@ -86,6 +86,7 @@ impl DataValue { DataValue::I16(i) => dst[..2].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::I32(i) => dst[..4].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::I64(i) => dst[..8].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::I128(i) => dst[..16].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::F32(f) => dst[..4].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::F64(f) => dst[..8].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::V128(v) => dst[..16].copy_from_slice(&v[..]), @@ -104,6 +105,7 @@ impl DataValue { types::I16 => DataValue::I16(i16::from_ne_bytes(src[..2].try_into().unwrap())), types::I32 => DataValue::I32(i32::from_ne_bytes(src[..4].try_into().unwrap())), types::I64 => DataValue::I64(i64::from_ne_bytes(src[..8].try_into().unwrap())), + types::I128 => DataValue::I128(i128::from_ne_bytes(src[..16].try_into().unwrap())), types::F32 => DataValue::F32(Ieee32::with_bits(u32::from_ne_bytes( src[..4].try_into().unwrap(), ))), diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 97b4a7c77d..a0e5a35e3a 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -921,6 +921,49 @@ impl<'a> Parser<'a> { } } + // Match and consume an i128 immediate. + fn match_imm128(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + let negative = text.starts_with('-'); + let positive = text.starts_with('+'); + let text = if negative || positive { + // Strip sign prefix. + &text[1..] + } else { + text + }; + + // Parse the text value; the lexer gives us raw text that looks like an integer. + let value = if text.starts_with("0x") { + // Skip underscores. + let text = text.replace("_", ""); + // Parse it as a i128 in hexadecimal form. + u128::from_str_radix(&text[2..], 16) + .map_err(|_| self.error("unable to parse i128 as a hexadecimal immediate"))? + } else { + // Parse it as a i128 to check for overflow and other issues. + text.parse() + .map_err(|_| self.error("expected i128 decimal immediate"))? + }; + + // Apply sign if necessary. + let signed = if negative { + let value = value.wrapping_neg() as i128; + if value > 0 { + return Err(self.error("negative number too small")); + } + value + } else { + value as i128 + }; + + Ok(signed) + } else { + err!(self.loc, err_msg) + } + } + // Match and consume an optional offset32 immediate. // // Note that this will match an empty string as an empty offset, and that if an offset is @@ -2805,6 +2848,7 @@ impl<'a> Parser<'a> { I16 => DataValue::from(self.match_imm16("expected an i16")?), I32 => DataValue::from(self.match_imm32("expected an i32")?), I64 => DataValue::from(Into::::into(self.match_imm64("expected an i64")?)), + I128 => DataValue::from(self.match_imm128("expected an i64")?), F32 => DataValue::from(self.match_ieee32("expected an f32")?), F64 => DataValue::from(self.match_ieee64("expected an f64")?), _ if ty.is_vector() => { @@ -4126,6 +4170,11 @@ mod tests { assert_eq!(parse("16", I16).to_string(), "16"); assert_eq!(parse("32", I32).to_string(), "32"); assert_eq!(parse("64", I64).to_string(), "64"); + assert_eq!( + parse("0x01234567_01234567_01234567_01234567", I128).to_string(), + "1512366032949150931280199141537564007" + ); + assert_eq!(parse("1234567", I128).to_string(), "1234567"); assert_eq!(parse("0x32.32", F32).to_string(), "0x1.919000p5"); assert_eq!(parse("0x64.64", F64).to_string(), "0x1.9190000000000p6"); assert_eq!(parse("true", B1).to_string(), "true"); From eae1b2d246dafcb1c1ed05817e33372927b0a773 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 15 Sep 2021 15:18:04 +0100 Subject: [PATCH 12/93] cranelift: Update i128 tests to use i128 values in functions --- .../filetests/runtests/i128-arithmetic.clif | 101 ++- .../filetests/runtests/i128-bandnot.clif | 14 + .../filetests/runtests/i128-bint.clif | 8 +- .../filetests/runtests/i128-bitops-cls.clif | 24 - .../filetests/runtests/i128-bitops-count.clif | 93 +-- .../filetests/runtests/i128-bitops.clif | 179 ++--- .../filetests/runtests/i128-bitrev.clif | 15 + .../filetests/runtests/i128-bornot.clif | 14 + .../filetests/filetests/runtests/i128-br.clif | 48 +- .../filetests/runtests/i128-bricmp.clif | 432 +++++------ .../filetests/runtests/i128-bxornot.clif | 14 + .../filetests/runtests/i128-cls.clif | 18 + .../filetests/runtests/i128-const.clif | 8 +- .../filetests/runtests/i128-extend-2.clif | 73 +- .../filetests/runtests/i128-extend.clif | 26 +- .../runtests/i128-icmp-overflow.clif | 72 +- .../filetests/runtests/i128-icmp.clif | 283 ++++---- .../filetests/runtests/i128-load-store.clif | 173 +++-- .../filetests/runtests/i128-reduce.clif | 61 +- .../filetests/runtests/i128-rotate.clif | 83 +-- .../filetests/runtests/i128-select.clif | 24 +- .../runtests/i128-shifts-small-types.clif | 138 ++-- .../filetests/runtests/i128-shifts.clif | 675 ++++++++---------- 23 files changed, 1164 insertions(+), 1412 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/i128-bandnot.clif delete mode 100644 cranelift/filetests/filetests/runtests/i128-bitops-cls.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-bornot.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-bxornot.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-cls.clif diff --git a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif index e5bbd08f6f..f891239155 100644 --- a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif @@ -1,74 +1,57 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x target x86_64 machinst -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser - -function %add_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = iadd v4, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %add_i128(i128, i128) -> i128 { +block0(v0: i128,v1: i128): + v2 = iadd v0, v1 + return v2 } -; run: %add_i128(0, 0, 0, 0) == [0, 0] -; run: %add_i128(0, -1, -1, 0) == [-1, -1] -; run: %add_i128(1, 0, 0, 0) == [1, 0] -; run: %add_i128(1, 0, 1, 0) == [2, 0] -; run: %add_i128(1, 0, -1, -1) == [0, 0] -; run: %add_i128(-1, 0, 1, 0) == [0, 1] +; run: %add_i128(0, 0) == 0 +; run: %add_i128(1, 0) == 1 +; run: %add_i128(1, 1) == 2 +; run: %add_i128(1, -1) == 0 +; run: %add_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == -1 +; run: %add_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 1) == 0x00000000_00000001_00000000_00000000 -; run: %add_i128(0x01234567_89ABCDEF, 0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [-1, -1] -; run: %add_i128(0x06060606_06060606, 0xA00A00A0_0A00A00A, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x36363636_36363636, 0xABBABBAB_BABBABBA] -; run: %add_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF] +; run: %add_i128(0x01234567_89ABCDEF_01234567_89ABCDEF, 0xFEDCBA98_76543210_FEDCBA98_76543210) == -1 +; run: %add_i128(0x06060606_06060606_A00A00A0_0A00A00A, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == 0x36363636_36363636_ABBABBAB_BABBABBA +; run: %add_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == 0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF -function %sub_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = isub v4, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %sub_i128(i128, i128) -> i128 { +block0(v0: i128,v1: i128): + v2 = isub v0, v1 + return v2 } -; run: %sub_i128(0, 0, 0, 0) == [0, 0] -; run: %sub_i128(1, 0, 1, 0) == [0, 0] -; run: %sub_i128(1, 0, 0, 0) == [1, 0] -; run: %sub_i128(0, 0, 1, 0) == [-1, -1] -; run: %sub_i128(0, 0, -1, -1) == [1, 0] +; run: %sub_i128(0, 0) == 0 +; run: %sub_i128(1, 1) == 0 +; run: %sub_i128(1, 0) == 1 +; run: %sub_i128(0, 1) == -1 +; run: %sub_i128(0, -1) == 1 -; run: %sub_i128(-1, -1, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [0x01234567_89ABCDEF, 0x01234567_89ABCDEF] -; run: %sub_i128(0x36363636_36363636, 0xABBABBAB_BABBABBA, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x06060606_06060606, 0xA00A00A0_0A00A00A] -; run: %sub_i128(0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE] +; run: %sub_i128(-1, 0xFEDCBA98_76543210_FEDCBA98_76543210) == 0x01234567_89ABCDEF_01234567_89ABCDEF +; run: %sub_i128(0x36363636_36363636_ABBABBAB_BABBABBA, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == 0x06060606_06060606_A00A00A0_0A00A00A +; run: %sub_i128(0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == 0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE -function %mul_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = imul v4, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %mul_i128(i128, i128) -> i128 { +block0(v0: i128,v1: i128): + v2 = imul v0, v1 + return v2 } -; run: %mul_i128(0, 0, 0, 0) == [0, 0] -; run: %mul_i128(1, 0, 1, 0) == [1, 0] -; run: %mul_i128(1, 0, 0, 0) == [0, 0] -; run: %mul_i128(0, 0, 1, 0) == [0, 0] -; run: %mul_i128(2, 0, 1, 0) == [2, 0] -; run: %mul_i128(2, 0, 2, 0) == [4, 0] -; run: %mul_i128(1, 0, -1, -1) == [-1, -1] -; run: %mul_i128(2, 0, -1, -1) == [-2, -1] +; run: %mul_i128(0, 0) == 0 +; run: %mul_i128(1, 1) == 1 +; run: %mul_i128(1, 0) == 0 +; run: %mul_i128(0, 1) == 0 +; run: %mul_i128(2, 1) == 2 +; run: %mul_i128(2, 2) == 4 +; run: %mul_i128(1, -1) == -1 +; run: %mul_i128(2, -1) == -2 -; run: %mul_i128(0x01010101_01010101, 0x01010101_01010101, 13, 0) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D] -; run: %mul_i128(13, 0, 0x01010101_01010101, 0x01010101_01010101) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D] -; run: %mul_i128(0x00000000_01234567, 0x89ABCDEF_00000000, 0x00000000_FEDCBA98, 0x76543210_00000000) == [0x0121FA00_23E20B28, 0xE2946058_00000000] -; run: %mul_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 0x5ECD38B5_9D1C2B7E] -; run: %mul_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 0x5ECD38B5_9D1C2B7E] +; run: %mul_i128(0x01010101_01010101_01010101_01010101, 13) == 0x0D0D0D0D_0D0D0D0D_0D0D0D0D_0D0D0D0D +; run: %mul_i128(13, 0x01010101_01010101_01010101_01010101) == 0x0D0D0D0D_0D0D0D0D_0D0D0D0D_0D0D0D0D +; run: %mul_i128(0x00000000_01234567_89ABCDEF_00000000, 0x00000000_FEDCBA98_76543210_00000000) == 0x2236D88F_E5618CF0_00000000_00000000 +; run: %mul_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF) == 0x5ECD38B5_9D1C2B7E_DB6B1E48_19BA1112 diff --git a/cranelift/filetests/filetests/runtests/i128-bandnot.clif b/cranelift/filetests/filetests/runtests/i128-bandnot.clif new file mode 100644 index 0000000000..bb2a4ba2ba --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bandnot.clif @@ -0,0 +1,14 @@ +test run +target aarch64 + +function %band_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = band_not v0, v1 + return v2 +} +; run: %band_not_i128(0, 0) == 0 +; run: %band_not_i128(-1, 0) == -1 +; run: %band_not_i128(-1, -1) == 0 +; run: %band_not_i128(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %band_not_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0xFEDCBA98_76543210_01234567_89ABCDEF +; run: %band_not_i128(0xFEEEFFFF_FEEEFFFF_F1FFFEFE_F1FFFEFE, 0x20240000_20240000_31001010_31001010) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-bint.clif b/cranelift/filetests/filetests/runtests/i128-bint.clif index 4c0ed4f890..f959af583b 100644 --- a/cranelift/filetests/filetests/runtests/i128-bint.clif +++ b/cranelift/filetests/filetests/runtests/i128-bint.clif @@ -1,12 +1,12 @@ test run +set enable_llvm_abi_extensions=true target aarch64 target x86_64 machinst -function %bint_b8_i128() -> i64, i64 { +function %bint_b8_i128() -> i128 { block0: v0 = bconst.b8 true v1 = bint.i128 v0 - v2, v3 = isplit.i128 v1 - return v2, v3 + return v1 } -; run: %bint_b8_i128() == [1, 0] +; run: %bint_b8_i128() == 1 diff --git a/cranelift/filetests/filetests/runtests/i128-bitops-cls.clif b/cranelift/filetests/filetests/runtests/i128-bitops-cls.clif deleted file mode 100644 index 14c82ceec4..0000000000 --- a/cranelift/filetests/filetests/runtests/i128-bitops-cls.clif +++ /dev/null @@ -1,24 +0,0 @@ -test run -target aarch64 - -; TODO: Move this test into i128-bitops-count.clif when x86_64 supports it -function %cls_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = cls v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 -} -; run: %cls_i128(0x00000000_00000000, 0x00000000_00000000) == 127 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 63 -; run: %cls_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 127 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == 0 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0x3FFFFFFF_FFFFFFFF) == 1 -; run: %cls_i128(0x7FFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0x80000000_00000000, 0xC0000000_00000000) == 1 -; run: %cls_i128(0x00000000_00000000, 0xC0000000_00000000) == 1 -; run: %cls_i128(0x80000000_00000000, 0x80000000_00000000) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif index 6c0f23dd1f..627dbf3e91 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif @@ -1,63 +1,48 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x target x86_64 machinst -function %ctz_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = ctz v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 +function %ctz_i128(i128) -> i128 { +block0(v0: i128): + v1 = ctz v0 + return v1 } -; run: %ctz_i128(0x00000000_00000000, 0x00000000_00000000) == 128 -; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 0 -; run: %ctz_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 64 -; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 0 -; run: %ctz_i128(0xFFFFFFFF_00000000, 0xF0000000_00000000) == 32 -; run: %ctz_i128(0xF0000000_00000000, 0xFF000000_00000000) == 60 -; run: %ctz_i128(0x00000001_00000000, 0x00000000_00000000) == 32 -; run: %ctz_i128(0x00000000_00000000, 0x00000001_00000000) == 96 -; run: %ctz_i128(0x00000000_00010000, 0x00000001_00000000) == 16 -; run: %ctz_i128(0x00000000_00010000, 0x00000000_00000000) == 16 +; run: %ctz_i128(0x00000000_00000000_00000000_00000000) == 128 +; run: %ctz_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0 +; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 64 +; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %ctz_i128(0xF0000000_00000000_FFFFFFFF_00000000) == 32 +; run: %ctz_i128(0xFF000000_00000000_F0000000_00000000) == 60 +; run: %ctz_i128(0x00000000_00000000_00000001_00000000) == 32 +; run: %ctz_i128(0x00000001_00000000_00000000_00000000) == 96 +; run: %ctz_i128(0x00000001_00000000_00000000_00010000) == 16 +; run: %ctz_i128(0x00000000_00000000_00000000_00010000) == 16 -function %clz_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = clz v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 +function %clz_i128(i128) -> i128 { +block0(v0: i128): + v1 = clz v0 + return v1 } -; run: %clz_i128(0x00000000_00000000, 0x00000000_00000000) == 128 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 64 -; run: %clz_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 0 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 0 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0x40000000_00000000) == 1 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0x20000000_00000000) == 2 -; run: %clz_i128(0x00000000_00000000, 0x00000000_80000000) == 32 -; run: %clz_i128(0x00000000_00000000, 0x00000001_00000000) == 31 -; run: %clz_i128(0x00000000_00010000, 0x00000001_00000000) == 31 -; run: %clz_i128(0x00000000_00010000, 0x00000000_00000000) == 111 +; run: %clz_i128(0x00000000_00000000_00000000_00000000) == 128 +; run: %clz_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 64 +; run: %clz_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0 +; run: %clz_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %clz_i128(0x40000000_00000000_FFFFFFFF_FFFFFFFF) == 1 +; run: %clz_i128(0x20000000_00000000_FFFFFFFF_FFFFFFFF) == 2 +; run: %clz_i128(0x00000000_80000000_00000000_00000000) == 32 +; run: %clz_i128(0x00000001_00000000_00000000_00000000) == 31 +; run: %clz_i128(0x00000001_00000000_00000000_00010000) == 31 +; run: %clz_i128(0x00000000_00000000_00000000_00010000) == 111 -function %popcnt_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = popcnt v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 +function %popcnt_i128(i128) -> i128 { +block0(v0: i128): + v1 = popcnt v0 + return v1 } -; run: %popcnt_i128(0x00000000_00000000, 0x00000000_00000000) == 0 -; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 64 -; run: %popcnt_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 64 -; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 128 -; run: %popcnt_i128(0x55555555_55555555, 0x55555555_55555555) == 64 -; run: %popcnt_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 96 +; run: %popcnt_i128(0x00000000_00000000_00000000_00000000) == 0 +; run: %popcnt_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 64 +; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 64 +; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 128 +; run: %popcnt_i128(0x55555555_55555555_55555555_55555555) == 64 +; run: %popcnt_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 96 diff --git a/cranelift/filetests/filetests/runtests/i128-bitops.clif b/cranelift/filetests/filetests/runtests/i128-bitops.clif index 1723186e6b..4a86f43fe5 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops.clif @@ -1,153 +1,54 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x -; target x86_64 TODO: Not yet implemented on x86_64 +target x86_64 machinst -; i128 tests -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser -function %bnot_i128(i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64): -v2 = iconcat v0, v1 - -v3 = bnot v2 - -v4, v5 = isplit v3 -return v4, v5 +function %bnot_i128(i128) -> i128 { +block0(v0: i128): + v1 = bnot v0 + return v1 } -; run: %bnot_i128(0, 0) == [-1, -1] -; run: %bnot_i128(-1, -1) == [0, 0] -; run: %bnot_i128(-1, 0) == [0, -1] - -; run: %bnot_i128(0x3F001111_3F001111, 0x21350000_21350000) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] +; run: %bnot_i128(0) == -1 +; run: %bnot_i128(-1) == 0 +; run: %bnot_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bnot_i128(0x3F001111_3F001111_21350000_21350000) == 0xC0FFEEEE_C0FFEEEE_DECAFFFF_DECAFFFF -function %band_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 -v6 = band v4, v5 - -v7, v8 = isplit v6 -return v7, v8 +function %band_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = band v0, v1 + return v2 } -; run: %band_i128(0, 0, 0, 0) == [0, 0] -; run: %band_i128(-1, -1, 0, 0) == [0, 0] -; run: %band_i128(-1, -1, -1, -1) == [-1, -1] -; run: %band_i128(-1, -1, 0, -1) == [0, -1] - -; run: %band_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0, 0] -; run: %band_i128(0xF1FFFEFE_F1FFFEFE, 0xFEEEFFFF_FEEEFFFF, 0xCEFFEFEF_CEFFEFEF, 0xDFDBFFFF_DFDBFFFF) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] +; run: %band_i128(0, 0) == 0 +; run: %band_i128(-1, 0) == 0 +; run: %band_i128(-1, -1) == -1 +; run: %band_i128(-1, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %band_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0 +; run: %band_i128(0xFEEEFFFF_FEEEFFFF_F1FFFEFE_F1FFFEFE, 0xDFDBFFFF_DFDBFFFF_CEFFEFEF_CEFFEFEF) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE -function %bor_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bor v4, v5 - -v7, v8 = isplit v6 -return v7, v8 +function %bor_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bor v0, v1 + return v2 } -; run: %bor_i128(0, 0, 0, 0) == [0, 0] -; run: %bor_i128(-1, -1, 0, 0) == [-1, -1] -; run: %bor_i128(-1, -1, -1, -1) == [-1, -1] -; run: %bor_i128(0, 0, 0, -1) == [0, -1] - -; run: %bor_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [-1, -1] -; run: %bor_i128(0x80AAAAAA_80AAAAAA, 0x8A8AAAAA_8A8AAAAA, 0x40554444_40554444, 0x54405555_54405555) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] +; run: %bor_i128(0, 0) == 0 +; run: %bor_i128(-1, 0) == -1 +; run: %bor_i128(-1, -1) == -1 +; run: %bor_i128(0, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bor_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == -1 +; run: %bor_i128(0x8A8AAAAA_8A8AAAAA_80AAAAAA_80AAAAAA, 0x54405555_54405555_40554444_40554444) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE -function %bxor_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bxor v4, v5 - -v7, v8 = isplit v6 -return v7, v8 +function %bxor_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor v0, v1 + return v2 } -; run: %bxor_i128(0, 0, 0, 0) == [0, 0] -; run: %bxor_i128(-1, -1, 0, 0) == [-1, -1] -; run: %bxor_i128(-1, -1, -1, -1) == [0, 0] -; run: %bxor_i128(-1, -1, 0, -1) == [-1, 0] - -; run: %bxor_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [-1, -1] -; run: %bxor_i128(0x8FA50A64_8FA50A64, 0x9440A07D_9440A07D, 0x4F5AE48A_4F5AE48A, 0x4A8A5F82_4A8A5F82) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %band_not_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = band_not v4, v5 - -v7, v8 = isplit v6 -return v7, v8 -} -; run: %band_not_i128(0, 0, 0, 0) == [0, 0] -; run: %band_not_i128(-1, -1, 0, 0) == [-1, -1] -; run: %band_not_i128(-1, -1, -1, -1) == [0, 0] -; run: %band_not_i128(-1, -1, 0, -1) == [-1, 0] - -; run: %band_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0x01234567_89ABCDEF, 0xFEDCBA98_76543210] -; run: %band_not_i128(0xF1FFFEFE_F1FFFEFE, 0xFEEEFFFF_FEEEFFFF, 0x31001010_31001010, 0x20240000_20240000) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %bor_not_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bor_not v4, v5 - -v7, v8 = isplit v6 -return v7, v8 -} -; run: %bor_not_i128(0, 0, 0, 0) == [-1, -1] -; run: %bor_not_i128(-1, -1, 0, 0) == [-1, -1] -; run: %bor_not_i128(-1, -1, -1, -1) == [-1, -1] -; run: %bor_not_i128(-1, 0, 0, -1) == [-1, 0] - -; run: %bor_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0x01234567_89ABCDEF, 0xFEDCBA98_76543210] -; run: %bor_not_i128(0x80AAAAAA_80AAAAAA, 0x8A8AAAAA_8A8AAAAA, 0xBFAABBBB_BFAABBBB, 0xABBFAAAA_ABBFAAAA) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %bxor_not_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bxor_not v4, v5 - -v7, v8 = isplit v6 -return v7, v8 -} -; run: %bxor_not_i128(0, 0, 0, 0) == [-1, -1] -; run: %bxor_not_i128(-1, -1, 0, 0) == [0, 0] -; run: %bxor_not_i128(-1, -1, -1, -1) == [-1, -1] -; run: %bxor_not_i128(-1, -1, 0, -1) == [0, -1] - -; run: %bxor_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0, 0] -; run: %bxor_not_i128(0x8FA50A64_8FA50A64, 0x9440A07D_9440A07D, 0xB0A51B75_B0A51B75, 0xB575A07D_B575A07D) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %bitrev_i128(i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = bitrev v2 - - v4, v5 = isplit v3 - return v4, v5 -} -; run: %bitrev_i128(0, 0) == [0, 0] -; run: %bitrev_i128(-1, -1) == [-1, -1] -; run: %bitrev_i128(-1, 0) == [0, -1] -; run: %bitrev_i128(0, -1) == [-1, 0] -; run: %bitrev_i128(0x00000000_00000000, 0x80000000_00000000) == [1, 0] -; run: %bitrev_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x084C2A6E_195D3B7F, 0xF7B3D591_E6A2C480] -; run: %bitrev_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == [0x7777FF03_FFFF537B, 0xFFFF537B_7777FF03] +; run: %bxor_i128(0, 0) == 0 +; run: %bxor_i128(-1, 0) == -1 +; run: %bxor_i128(-1, -1) == 0 +; run: %bxor_i128(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bxor_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == -1 +; run: %bxor_i128(0x9440A07D_9440A07D_8FA50A64_8FA50A64, 0x4A8A5F82_4A8A5F82_4F5AE48A_4F5AE48A) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-bitrev.clif b/cranelift/filetests/filetests/runtests/i128-bitrev.clif index e494b32597..5e30ad90fd 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitrev.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitrev.clif @@ -1,4 +1,5 @@ test run +set enable_llvm_abi_extensions=true target aarch64 target x86_64 machinst target x86_64 legacy @@ -46,3 +47,17 @@ block0: return v7 } ; run + + +function %bitrev_i128(i128) -> i128 { +block0(v0: i128): + v1 = bitrev v0 + return v1 +} +; run: %bitrev_i128(0) == 0 +; run: %bitrev_i128(-1) == -1 +; run: %bitrev_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bitrev_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0xFFFFFFFF_FFFFFFFF_00000000_00000000 +; run: %bitrev_i128(0x80000000_00000000_00000000_00000000) == 1 +; run: %bitrev_i128(0xFEDCBA98_76543210_01234567_89ABCDEF) == 0xF7B3D591_E6A2C480_084C2A6E_195D3B7F +; run: %bitrev_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xFFFF537B_7777FF03_7777FF03_FFFF537B diff --git a/cranelift/filetests/filetests/runtests/i128-bornot.clif b/cranelift/filetests/filetests/runtests/i128-bornot.clif new file mode 100644 index 0000000000..e86f71915d --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bornot.clif @@ -0,0 +1,14 @@ +test run +target aarch64 + +function %bor_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bor_not v0, v1 + return v2 +} +; run: %bor_not_i128(0, 0) == -1 +; run: %bor_not_i128(-1, 0) == -1 +; run: %bor_not_i128(-1, -1) == -1 +; run: %bor_not_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bor_not_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0xFEDCBA98_76543210_01234567_89ABCDEF +; run: %bor_not_i128(0x8A8AAAAA_8A8AAAAA_80AAAAAA_80AAAAAA, 0xABBFAAAA_ABBFAAAA_BFAABBBB_BFAABBBB) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-br.clif b/cranelift/filetests/filetests/runtests/i128-br.clif index 74b4f6fa8f..d755a5bd06 100644 --- a/cranelift/filetests/filetests/runtests/i128-br.clif +++ b/cranelift/filetests/filetests/runtests/i128-br.clif @@ -1,45 +1,43 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x target x86_64 machinst target x86_64 legacy -function %i128_brz(i64, i64) -> b1 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - brz v2, block2 +function %i128_brz(i128) -> b1 { +block0(v0: i128): + brz v0, block2 jump block1 block1: - v3 = bconst.b1 false - return v3 + v1 = bconst.b1 false + return v1 block2: - v4 = bconst.b1 true - return v4 + v2 = bconst.b1 true + return v2 } -; run: %i128_brz(0, 0) == true -; run: %i128_brz(-1, 0) == false -; run: %i128_brz(0, -1) == false -; run: %i128_brz(-1, -1) == false +; run: %i128_brz(0) == true +; run: %i128_brz(-1) == false +; run: %i128_brz(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == false +; run: %i128_brz(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == false -function %i128_brnz(i64, i64) -> b1 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - brnz v2, block2 +function %i128_brnz(i128) -> b1 { +block0(v0: i128): + brnz v0, block2 jump block1 block1: - v3 = bconst.b1 false - return v3 + v1 = bconst.b1 false + return v1 block2: - v4 = bconst.b1 true - return v4 + v2 = bconst.b1 true + return v2 } -; run: %i128_brnz(0, 0) == false -; run: %i128_brnz(-1, 0) == true -; run: %i128_brnz(0, -1) == true -; run: %i128_brnz(-1, -1) == true +; run: %i128_brnz(0) == false +; run: %i128_brnz(-1) == true +; run: %i128_brnz(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_brnz(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true diff --git a/cranelift/filetests/filetests/runtests/i128-bricmp.clif b/cranelift/filetests/filetests/runtests/i128-bricmp.clif index 0f5f96536b..29f340fbdb 100644 --- a/cranelift/filetests/filetests/runtests/i128-bricmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-bricmp.clif @@ -1,321 +1,285 @@ test run target aarch64 -function %i128_bricmp_eq(i64, i64, i64, i64) -> b1 { -block0(v0: i64, v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 eq v4, v5, block2 +function %i128_bricmp_eq(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + br_icmp.i128 eq v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_eq(0, 0, 0, 0) == true -; run: %i128_bricmp_eq(-1, -1, -1, -1) == true -; run: %i128_bricmp_eq(-1, -1, 0, 0) == false -; run: %i128_bricmp_eq(-1, -1, 0, -1) == false -; run: %i128_bricmp_eq(-1, 0, -1, -1) == false -; run: %i128_bricmp_eq(0, -1, -1, -1) == false -; run: %i128_bricmp_eq(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true -; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == false -; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == false +; run: %i128_bricmp_eq(0, 0) == true +; run: %i128_bricmp_eq(-1, -1) == true +; run: %i128_bricmp_eq(-1, 0) == false +; run: %i128_bricmp_eq(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == false +; run: %i128_bricmp_eq(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == false +; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == false +; run: %i128_bricmp_eq(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true +; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false +; run: %i128_bricmp_eq(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false -function %i128_bricmp_ne(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ne v4, v5, block2 +function %i128_bricmp_ne(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ne v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ne(0, 0, 0, 0) == false -; run: %i128_bricmp_ne(-1, -1, -1, -1) == false -; run: %i128_bricmp_ne(-1, -1, 0, 0) == true -; run: %i128_bricmp_ne(-1, -1, 0, -1) == true -; run: %i128_bricmp_ne(-1, 0, -1, -1) == true -; run: %i128_bricmp_ne(0, -1, -1, -1) == true -; run: %i128_bricmp_ne(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == false -; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == true -; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == true +; run: %i128_bricmp_ne(0, 0) == false +; run: %i128_bricmp_ne(-1, -1) == false +; run: %i128_bricmp_ne(-1, 0) == true +; run: %i128_bricmp_ne(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_bricmp_ne(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == true +; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == true +; run: %i128_bricmp_ne(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == false +; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true +; run: %i128_bricmp_ne(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true -function %i128_bricmp_slt(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 slt v4, v5, block2 +function %i128_bricmp_slt(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 slt v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_slt(0, 0, 0, 0) == false -; run: %i128_bricmp_slt(1, 0, 1, 0) == false -; run: %i128_bricmp_slt(0, 0, 1, 0) == true -; run: %i128_bricmp_slt(-1, -1, 0, 0) == true -; run: %i128_bricmp_slt(0, 0, -1, -1) == false -; run: %i128_bricmp_slt(-1, -1, -1, -1) == false -; run: %i128_bricmp_slt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_slt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_slt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_slt(0, 0) == false +; run: %i128_bricmp_slt(1, 1) == false +; run: %i128_bricmp_slt(0, 1) == true +; run: %i128_bricmp_slt(-1, 0) == true +; run: %i128_bricmp_slt(0, -1) == false +; run: %i128_bricmp_slt(-1, -1) == false +; run: %i128_bricmp_slt(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_slt(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_slt(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_ult(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ult v4, v5, block2 +function %i128_bricmp_ult(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ult v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ult(0, 0, 0, 0) == false -; run: %i128_bricmp_ult(1, 0, 1, 0) == false -; run: %i128_bricmp_ult(0, 0, 1, 0) == true -; run: %i128_bricmp_ult(-1, -1, 0, 0) == false -; run: %i128_bricmp_ult(0, 0, -1, -1) == true -; run: %i128_bricmp_ult(-1, -1, -1, -1) == false -; run: %i128_bricmp_ult(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_ult(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_ult(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_ult(0, 0) == false +; run: %i128_bricmp_ult(1, 1) == false +; run: %i128_bricmp_ult(0, 1) == true +; run: %i128_bricmp_ult(-1, 0) == false +; run: %i128_bricmp_ult(0, -1) == true +; run: %i128_bricmp_ult(-1, -1) == false +; run: %i128_bricmp_ult(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_ult(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_ult(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_sle(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 sle v4, v5, block2 +function %i128_bricmp_sle(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 sle v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_sle(0, 0, 0, 0) == true -; run: %i128_bricmp_sle(1, 0, 1, 0) == true -; run: %i128_bricmp_sle(0, 0, 1, 0) == true -; run: %i128_bricmp_sle(-1, -1, 0, 0) == true -; run: %i128_bricmp_sle(0, 0, -1, -1) == false -; run: %i128_bricmp_sle(-1, -1, -1, -1) == true -; run: %i128_bricmp_sle(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_sle(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_sle(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_sle(0, 0) == true +; run: %i128_bricmp_sle(1, 1) == true +; run: %i128_bricmp_sle(0, 1) == true +; run: %i128_bricmp_sle(-1, 0) == true +; run: %i128_bricmp_sle(0, -1) == false +; run: %i128_bricmp_sle(-1, -1) == true +; run: %i128_bricmp_sle(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_sle(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_sle(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_ule(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ule v4, v5, block2 +function %i128_bricmp_ule(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ule v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ule(0, 0, 0, 0) == true -; run: %i128_bricmp_ule(1, 0, 1, 0) == true -; run: %i128_bricmp_ule(0, 0, 1, 0) == true -; run: %i128_bricmp_ule(-1, -1, 0, 0) == false -; run: %i128_bricmp_ule(0, 0, -1, -1) == true -; run: %i128_bricmp_ule(-1, -1, -1, -1) == true -; run: %i128_bricmp_ule(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_ule(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_ule(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_ule(0, 0) == true +; run: %i128_bricmp_ule(1, 1) == true +; run: %i128_bricmp_ule(0, 1) == true +; run: %i128_bricmp_ule(-1, 0) == false +; run: %i128_bricmp_ule(0, -1) == true +; run: %i128_bricmp_ule(-1, -1) == true +; run: %i128_bricmp_ule(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_ule(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_ule(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_sgt(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 sgt v4, v5, block2 +function %i128_bricmp_sgt(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 sgt v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_sgt(0, 0, 0, 0) == false -; run: %i128_bricmp_sgt(1, 0, 1, 0) == false -; run: %i128_bricmp_sgt(0, 0, 1, 0) == false -; run: %i128_bricmp_sgt(-1, -1, 0, 0) == false -; run: %i128_bricmp_sgt(0, 0, -1, -1) == true -; run: %i128_bricmp_sgt(-1, -1, -1, -1) == false -; run: %i128_bricmp_sgt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_sgt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_sgt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_sgt(0, 0) == false +; run: %i128_bricmp_sgt(1, 1) == false +; run: %i128_bricmp_sgt(0, 1) == false +; run: %i128_bricmp_sgt(-1, 0) == false +; run: %i128_bricmp_sgt(0, -1) == true +; run: %i128_bricmp_sgt(-1, -1) == false +; run: %i128_bricmp_sgt(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_sgt(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_sgt(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_ugt(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ugt v4, v5, block2 +function %i128_bricmp_ugt(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ugt v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ugt(0, 0, 0, 0) == false -; run: %i128_bricmp_ugt(1, 0, 1, 0) == false -; run: %i128_bricmp_ugt(0, 0, 1, 0) == false -; run: %i128_bricmp_ugt(-1, -1, 0, 0) == true -; run: %i128_bricmp_ugt(0, 0, -1, -1) == false -; run: %i128_bricmp_ugt(-1, -1, -1, -1) == false -; run: %i128_bricmp_ugt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_ugt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_ugt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_ugt(0, 0) == false +; run: %i128_bricmp_ugt(1, 1) == false +; run: %i128_bricmp_ugt(0, 1) == false +; run: %i128_bricmp_ugt(-1, 0) == true +; run: %i128_bricmp_ugt(0, -1) == false +; run: %i128_bricmp_ugt(-1, -1) == false +; run: %i128_bricmp_ugt(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_ugt(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_ugt(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_sge(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 sge v4, v5, block2 +function %i128_bricmp_sge(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 sge v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_sge(0, 0, 0, 0) == true -; run: %i128_bricmp_sge(1, 0, 1, 0) == true -; run: %i128_bricmp_sge(0, 0, 1, 0) == false -; run: %i128_bricmp_sge(-1, -1, 0, 0) == false -; run: %i128_bricmp_sge(0, 0, -1, -1) == true -; run: %i128_bricmp_sge(-1, -1, -1, -1) == true -; run: %i128_bricmp_sge(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_sge(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_sge(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_sge(0, 0) == true +; run: %i128_bricmp_sge(1, 1) == true +; run: %i128_bricmp_sge(0, 1) == false +; run: %i128_bricmp_sge(-1, 0) == false +; run: %i128_bricmp_sge(0, -1) == true +; run: %i128_bricmp_sge(-1, -1) == true +; run: %i128_bricmp_sge(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_sge(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_sge(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_uge(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 uge v4, v5, block2 +function %i128_bricmp_uge(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 uge v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_uge(0, 0, 0, 0) == true -; run: %i128_bricmp_uge(1, 0, 1, 0) == true -; run: %i128_bricmp_uge(0, 0, 1, 0) == false -; run: %i128_bricmp_uge(-1, -1, 0, 0) == true -; run: %i128_bricmp_uge(0, 0, -1, -1) == false -; run: %i128_bricmp_uge(-1, -1, -1, -1) == true -; run: %i128_bricmp_uge(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_uge(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_uge(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_uge(0, 0) == true +; run: %i128_bricmp_uge(1, 1) == true +; run: %i128_bricmp_uge(0, 1) == false +; run: %i128_bricmp_uge(-1, 0) == true +; run: %i128_bricmp_uge(0, -1) == false +; run: %i128_bricmp_uge(-1, -1) == true +; run: %i128_bricmp_uge(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_uge(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_uge(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_of(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 of v4, v5, block2 +function %i128_bricmp_of(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 of v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_of(0, 0, 0, 0) == false -; run: %i128_bricmp_of(0, 0, 1, 0) == false -; run: %i128_bricmp_of(0, 0, -1, -1) == false -; run: %i128_bricmp_of(-1, -1, -1, -1) == false -; run: %i128_bricmp_of(0x00000000_00000000, 0x80000000_00000000, 0, 0) == false -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == false -; run: %i128_bricmp_of(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == true -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == false -; run: %i128_bricmp_of(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == false -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == true +; run: %i128_bricmp_of(0, 0) == false +; run: %i128_bricmp_of(0, 1) == false +; run: %i128_bricmp_of(0, -1) == false +; run: %i128_bricmp_of(-1, -1) == false +; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false +; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false +; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false +; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true -function %i128_bricmp_nof(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 nof v4, v5, block2 +function %i128_bricmp_nof(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 nof v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_nof(0, 0, 0, 0) == true -; run: %i128_bricmp_nof(0, 0, 1, 0) == true -; run: %i128_bricmp_nof(0, 0, -1, -1) == true -; run: %i128_bricmp_nof(-1, -1, -1, -1) == true -; run: %i128_bricmp_nof(0x00000000_00000000, 0x80000000_00000000, 0, 0) == true -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == true -; run: %i128_bricmp_nof(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == false -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == true -; run: %i128_bricmp_nof(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == true -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == false +; run: %i128_bricmp_nof(0, 0) == true +; run: %i128_bricmp_nof(0, 1) == true +; run: %i128_bricmp_nof(0, -1) == true +; run: %i128_bricmp_nof(-1, -1) == true +; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true +; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true +; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true +; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false diff --git a/cranelift/filetests/filetests/runtests/i128-bxornot.clif b/cranelift/filetests/filetests/runtests/i128-bxornot.clif new file mode 100644 index 0000000000..ecacc84387 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bxornot.clif @@ -0,0 +1,14 @@ +test run +target aarch64 + +function %bxor_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor_not v0, v1 + return v2 +} +; run: %bxor_not_i128(0, 0) == -1 +; run: %bxor_not_i128(-1, 0) == 0 +; run: %bxor_not_i128(-1, -1) == -1 +; run: %bxor_not_i128(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0xFFFFFFFF_FFFFFFFF_00000000_00000000 +; run: %bxor_not_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0 +; run: %bxor_not_i128(0x9440A07D_9440A07D_8FA50A64_8FA50A64, 0xB575A07D_B575A07D_B0A51B75_B0A51B75) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-cls.clif b/cranelift/filetests/filetests/runtests/i128-cls.clif new file mode 100644 index 0000000000..212d2e5135 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-cls.clif @@ -0,0 +1,18 @@ +test run +target aarch64 + +function %cls_i128(i128) -> i128 { +block0(v0: i128): + v1 = cls v0 + return v1 +} +; run: %cls_i128(0x00000000_00000000_00000000_00000000) == 127 +; run: %cls_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 127 +; run: %cls_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %cls_i128(0x3FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 1 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_7FFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xC0000000_00000000_80000000_00000000) == 1 +; run: %cls_i128(0xC0000000_00000000_00000000_00000000) == 1 +; run: %cls_i128(0x80000000_00000000_80000000_00000000) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-const.clif b/cranelift/filetests/filetests/runtests/i128-const.clif index 604015a99f..65975c680c 100644 --- a/cranelift/filetests/filetests/runtests/i128-const.clif +++ b/cranelift/filetests/filetests/runtests/i128-const.clif @@ -1,13 +1,13 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 ; target s390x TODO: Not yet implemented on s390x target x86_64 machinst -function %i128_const_0() -> i64, i64 { +function %i128_const_0() -> i128 { block0: v1 = iconst.i128 0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_const_0() == [0, 0] +; run: %i128_const_0() == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-extend-2.clif b/cranelift/filetests/filetests/runtests/i128-extend-2.clif index 906d699bbc..0d5ce42a58 100644 --- a/cranelift/filetests/filetests/runtests/i128-extend-2.clif +++ b/cranelift/filetests/filetests/runtests/i128-extend-2.clif @@ -1,69 +1,64 @@ test run +set enable_llvm_abi_extensions=true target aarch64 target x86_64 machinst ; TODO: Merge this file with i128-extend once the x86 legacy backend is removed -function %i128_uextend_i32(i32) -> i64, i64 { +function %i128_uextend_i32(i32) -> i128 { block0(v0: i32): v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_uextend_i32(0) == [0, 0] -; run: %i128_uextend_i32(-1) == [0xffff_ffff, 0] -; run: %i128_uextend_i32(0xffff_eeee) == [0xffff_eeee, 0] +; run: %i128_uextend_i32(0) == 0 +; run: %i128_uextend_i32(-1) == 0x00000000_00000000_00000000_ffffffff +; run: %i128_uextend_i32(0xffff_eeee) == 0x00000000_00000000_00000000_ffffeeee -function %i128_sextend_i32(i32) -> i64, i64 { +function %i128_sextend_i32(i32) -> i128 { block0(v0: i32): v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_sextend_i32(0) == [0, 0] -; run: %i128_sextend_i32(-1) == [-1, -1] -; run: %i128_sextend_i32(0x7fff_ffff) == [0x7fff_ffff, 0x0000_0000_0000_0000] -; run: %i128_sextend_i32(0xffff_eeee) == [0xffff_ffff_ffff_eeee, 0xffff_ffff_ffff_ffff] +; run: %i128_sextend_i32(0) == 0 +; run: %i128_sextend_i32(-1) == -1 +; run: %i128_sextend_i32(0x7fff_ffff) == 0x00000000_00000000_00000000_7fffffff +; run: %i128_sextend_i32(0xffff_eeee) == 0xffffffff_ffffffff_ffffffff_ffff_eeee -function %i128_uextend_i16(i16) -> i64, i64 { +function %i128_uextend_i16(i16) -> i128 { block0(v0: i16): v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_uextend_i16(0) == [0, 0] -; run: %i128_uextend_i16(-1) == [0xffff, 0] -; run: %i128_uextend_i16(0xffee) == [0xffee, 0] +; run: %i128_uextend_i16(0) == 0 +; run: %i128_uextend_i16(-1) == 0x00000000_00000000_00000000_0000ffff +; run: %i128_uextend_i16(0xffee) == 0x00000000_00000000_00000000_0000ffee -function %i128_sextend_i16(i16) -> i64, i64 { +function %i128_sextend_i16(i16) -> i128 { block0(v0: i16): v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_sextend_i16(0) == [0, 0] -; run: %i128_sextend_i16(-1) == [-1, -1] -; run: %i128_sextend_i16(0x7fff) == [0x7fff, 0x0000_0000_0000_0000] -; run: %i128_sextend_i16(0xffee) == [0xffff_ffff_ffff_ffee, 0xffff_ffff_ffff_ffff] +; run: %i128_sextend_i16(0) == 0 +; run: %i128_sextend_i16(-1) == -1 +; run: %i128_sextend_i16(0x7fff) == 0x00000000_00000000_00000000_00007fff +; run: %i128_sextend_i16(0xffee) == 0xffffffff_ffffffff_ffffffff_ffffffee -function %i128_uextend_i8(i8) -> i64, i64 { +function %i128_uextend_i8(i8) -> i128 { block0(v0: i8): v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_uextend_i8(0) == [0, 0] -; run: %i128_uextend_i8(-1) == [0xff, 0] -; run: %i128_uextend_i8(0xfe) == [0xfe, 0] +; run: %i128_uextend_i8(0) == 0 +; run: %i128_uextend_i8(-1) == 0x00000000_00000000_00000000_000000ff +; run: %i128_uextend_i8(0xfe) == 0x00000000_00000000_00000000_000000fe -function %i128_sextend_i8(i8) -> i64, i64 { +function %i128_sextend_i8(i8) -> i128 { block0(v0: i8): v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_sextend_i8(0) == [0, 0] -; run: %i128_sextend_i8(-1) == [-1, -1] -; run: %i128_sextend_i8(0x7f) == [0x7f, 0x0000_0000_0000_0000] -; run: %i128_sextend_i8(0xfe) == [0xffff_ffff_ffff_fffe, 0xffff_ffff_ffff_ffff] +; run: %i128_sextend_i8(0) == 0 +; run: %i128_sextend_i8(-1) == -1 +; run: %i128_sextend_i8(0x7f) == 0x00000000_00000000_00000000_0000007f +; run: %i128_sextend_i8(0xfe) == 0xffffffff_ffffffff_ffffffff_fffffffe diff --git a/cranelift/filetests/filetests/runtests/i128-extend.clif b/cranelift/filetests/filetests/runtests/i128-extend.clif index 57263be68e..1789299479 100644 --- a/cranelift/filetests/filetests/runtests/i128-extend.clif +++ b/cranelift/filetests/filetests/runtests/i128-extend.clif @@ -1,26 +1,24 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x target x86_64 machinst target x86_64 legacy -function %i128_uextend_i64(i64) -> i64, i64 { +function %i128_uextend_i64(i64) -> i128 { block0(v0: i64): v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_uextend_i64(0) == [0, 0] -; run: %i128_uextend_i64(-1) == [-1, 0] -; run: %i128_uextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0] +; run: %i128_uextend_i64(0) == 0 +; run: %i128_uextend_i64(-1) == 0x00000000_00000000_ffffffff_ffffffff +; run: %i128_uextend_i64(0xffff_ffff_eeee_0000) == 0x00000000_00000000_ffff_ffff_eeee_0000 -function %i128_sextend_i64(i64) -> i64, i64 { +function %i128_sextend_i64(i64) -> i128 { block0(v0: i64): v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_sextend_i64(0) == [0, 0] -; run: %i128_sextend_i64(-1) == [-1, -1] -; run: %i128_sextend_i64(0x7fff_ffff_ffff_ffff) == [0x7fff_ffff_ffff_ffff, 0x0000_0000_0000_0000] -; run: %i128_sextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0xffff_ffff_ffff_ffff] +; run: %i128_sextend_i64(0) == 0 +; run: %i128_sextend_i64(-1) == -1 +; run: %i128_sextend_i64(0x7fff_ffff_ffff_ffff) == 0x00000000_00000000_7fffffffffffffff +; run: %i128_sextend_i64(0xffff_ffff_eeee_0000) == 0xffffffff_ffffffff_ffffffff_eeee0000 diff --git a/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif b/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif index e91b21de50..abbcd7d2f8 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif @@ -1,46 +1,38 @@ test run target aarch64 -; target x86_64 machinst TODO: X86_64 does not implement i128 icmp overflow -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser -function %icmp_of_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = icmp.i128 of v4, v5 - return v6 +function %icmp_of_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 of v0, v1 + return v2 } -; run: %icmp_of_i128(0, 0, 0, 0) == false -; run: %icmp_of_i128(0, 0, 1, 0) == false -; run: %icmp_of_i128(0, 0, -1, -1) == false -; run: %icmp_of_i128(-1, -1, -1, -1) == false -; run: %icmp_of_i128(0x00000000_00000000, 0x80000000_00000000, 0, 0) == false -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == false -; run: %icmp_of_i128(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == true -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == false -; run: %icmp_of_i128(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == false -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == true +; run: %icmp_of_i128(0, 0) == false +; run: %icmp_of_i128(0, 1) == false +; run: %icmp_of_i128(0, -1) == false +; run: %icmp_of_i128(-1, -1) == false +; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0) == false +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false +; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false +; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false +; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true -function %icmp_nof_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = icmp.i128 nof v4, v5 - return v6 +function %icmp_nof_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 nof v0, v1 + return v2 } -; run: %icmp_nof_i128(0, 0, 0, 0) == true -; run: %icmp_nof_i128(0, 0, 1, 0) == true -; run: %icmp_nof_i128(0, 0, -1, -1) == true -; run: %icmp_nof_i128(-1, -1, -1, -1) == true -; run: %icmp_nof_i128(0x00000000_00000000, 0x80000000_00000000, 0, 0) == true -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == true -; run: %icmp_nof_i128(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == false -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == true -; run: %icmp_nof_i128(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == true -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == false +; run: %icmp_nof_i128(0, 0) == true +; run: %icmp_nof_i128(0, 1) == true +; run: %icmp_nof_i128(0, -1) == true +; run: %icmp_nof_i128(-1, -1) == true +; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0) == true +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true +; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true +; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true +; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false diff --git a/cranelift/filetests/filetests/runtests/i128-icmp.clif b/cranelift/filetests/filetests/runtests/i128-icmp.clif index e29000b931..5758d5ef35 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp.clif @@ -1,189 +1,168 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 target x86_64 machinst -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser -function %icmp_eq_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = icmp.i128 eq v4, v5 - return v6 +function %icmp_eq_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %icmp_eq_i128(0, 0, 0, 0) == true -; run: %icmp_eq_i128(-1, -1, -1, -1) == true -; run: %icmp_eq_i128(-1, -1, 0, 0) == false -; run: %icmp_eq_i128(-1, -1, 0, -1) == false -; run: %icmp_eq_i128(-1, 0, -1, -1) == false -; run: %icmp_eq_i128(0, -1, -1, -1) == false -; run: %icmp_eq_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true -; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == false -; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == false +; run: %icmp_eq_i128(0, 0) == true +; run: %icmp_eq_i128(-1, -1) == true +; run: %icmp_eq_i128(-1, 0) == false +; run: %icmp_eq_i128(-1, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_eq_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == false +; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == false +; run: %icmp_eq_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true +; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false +; run: %icmp_eq_i128(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false -function %icmp_ne_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ne v4, v5 - return v6 +function %icmp_ne_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ne v0, v1 + return v2 } -; run: %icmp_ne_i128(0, 0, 0, 0) == false -; run: %icmp_ne_i128(-1, -1, -1, -1) == false -; run: %icmp_ne_i128(-1, -1, 0, 0) == true -; run: %icmp_ne_i128(-1, -1, 0, -1) == true -; run: %icmp_ne_i128(-1, 0, -1, -1) == true -; run: %icmp_ne_i128(0, -1, -1, -1) == true -; run: %icmp_ne_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == false -; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == true -; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == true +; run: %icmp_ne_i128(0, 0) == false +; run: %icmp_ne_i128(-1, -1) == false +; run: %icmp_ne_i128(-1, 0) == true +; run: %icmp_ne_i128(-1, 0x00000000_00000000_FFFFFFFF_FFFFFF) == true +; run: %icmp_ne_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == true +; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == true +; run: %icmp_ne_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == false +; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true +; run: %icmp_ne_i128(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true -function %icmp_slt_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 slt v4, v5 - return v6 +function %icmp_slt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 slt v0, v1 + return v2 } -; run: %icmp_slt_i128(0, 0, 0, 0) == false -; run: %icmp_slt_i128(1, 0, 1, 0) == false -; run: %icmp_slt_i128(0, 0, 1, 0) == true -; run: %icmp_slt_i128(-1, -1, 0, 0) == true -; run: %icmp_slt_i128(0, 0, -1, -1) == false -; run: %icmp_slt_i128(-1, -1, -1, -1) == false -; run: %icmp_slt_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_slt_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_slt_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_slt_i128(0, 0) == false +; run: %icmp_slt_i128(1, 1) == false +; run: %icmp_slt_i128(0, 1) == true +; run: %icmp_slt_i128(-1, 0) == true +; run: %icmp_slt_i128(0, -1) == false +; run: %icmp_slt_i128(-1, -1) == false +; run: %icmp_slt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_slt_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_slt_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_ult_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ult v4, v5 - return v6 +function %icmp_ult_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ult v0, v1 + return v2 } -; run: %icmp_ult_i128(0, 0, 0, 0) == false -; run: %icmp_ult_i128(1, 0, 1, 0) == false -; run: %icmp_ult_i128(0, 0, 1, 0) == true -; run: %icmp_ult_i128(-1, -1, 0, 0) == false -; run: %icmp_ult_i128(0, 0, -1, -1) == true -; run: %icmp_ult_i128(-1, -1, -1, -1) == false -; run: %icmp_ult_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_ult_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_ult_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_ult_i128(0, 0) == false +; run: %icmp_ult_i128(1, 1) == false +; run: %icmp_ult_i128(0, 1) == true +; run: %icmp_ult_i128(-1, 0) == false +; run: %icmp_ult_i128(0, -1) == true +; run: %icmp_ult_i128(-1, -1) == false +; run: %icmp_ult_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_ult_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_ult_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_sle_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 sle v4, v5 - return v6 +function %icmp_sle_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 sle v0, v1 + return v2 } -; run: %icmp_sle_i128(0, 0, 0, 0) == true -; run: %icmp_sle_i128(1, 0, 1, 0) == true -; run: %icmp_sle_i128(0, 0, 1, 0) == true -; run: %icmp_sle_i128(-1, -1, 0, 0) == true -; run: %icmp_sle_i128(0, 0, -1, -1) == false -; run: %icmp_sle_i128(-1, -1, -1, -1) == true -; run: %icmp_sle_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_sle_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_sle_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_sle_i128(0, 0) == true +; run: %icmp_sle_i128(1, 1) == true +; run: %icmp_sle_i128(0, 1) == true +; run: %icmp_sle_i128(-1, 0) == true +; run: %icmp_sle_i128(0, -1) == false +; run: %icmp_sle_i128(-1, -1) == true +; run: %icmp_sle_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_sle_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_sle_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_ule_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ule v4, v5 - return v6 +function %icmp_ule_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ule v0, v1 + return v2 } -; run: %icmp_ule_i128(0, 0, 0, 0) == true -; run: %icmp_ule_i128(1, 0, 1, 0) == true -; run: %icmp_ule_i128(0, 0, 1, 0) == true -; run: %icmp_ule_i128(-1, -1, 0, 0) == false -; run: %icmp_ule_i128(0, 0, -1, -1) == true -; run: %icmp_ule_i128(-1, -1, -1, -1) == true -; run: %icmp_ule_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_ule_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_ule_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_ule_i128(0, 0) == true +; run: %icmp_ule_i128(1, 1) == true +; run: %icmp_ule_i128(0, 1) == true +; run: %icmp_ule_i128(-1, 0) == false +; run: %icmp_ule_i128(0, -1) == true +; run: %icmp_ule_i128(-1, -1) == true +; run: %icmp_ule_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_ule_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_ule_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_sgt_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 sgt v4, v5 - return v6 +function %icmp_sgt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 sgt v0, v1 + return v2 } -; run: %icmp_sgt_i128(0, 0, 0, 0) == false -; run: %icmp_sgt_i128(1, 0, 1, 0) == false -; run: %icmp_sgt_i128(0, 0, 1, 0) == false -; run: %icmp_sgt_i128(-1, -1, 0, 0) == false -; run: %icmp_sgt_i128(0, 0, -1, -1) == true -; run: %icmp_sgt_i128(-1, -1, -1, -1) == false -; run: %icmp_sgt_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_sgt_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_sgt_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_sgt_i128(0, 0) == false +; run: %icmp_sgt_i128(1, 1) == false +; run: %icmp_sgt_i128(0, 1) == false +; run: %icmp_sgt_i128(-1, 0) == false +; run: %icmp_sgt_i128(0, -1) == true +; run: %icmp_sgt_i128(-1, -1) == false +; run: %icmp_sgt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_sgt_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_sgt_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %icmp_ugt_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ugt v4, v5 - return v6 +function %icmp_ugt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ugt v0, v1 + return v2 } -; run: %icmp_ugt_i128(0, 0, 0, 0) == false -; run: %icmp_ugt_i128(1, 0, 1, 0) == false -; run: %icmp_ugt_i128(0, 0, 1, 0) == false -; run: %icmp_ugt_i128(-1, -1, 0, 0) == true -; run: %icmp_ugt_i128(0, 0, -1, -1) == false -; run: %icmp_ugt_i128(-1, -1, -1, -1) == false -; run: %icmp_ugt_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_ugt_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_ugt_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_ugt_i128(0, 0) == false +; run: %icmp_ugt_i128(1, 1) == false +; run: %icmp_ugt_i128(0, 1) == false +; run: %icmp_ugt_i128(-1, 0) == true +; run: %icmp_ugt_i128(0, -1) == false +; run: %icmp_ugt_i128(-1, -1) == false +; run: %icmp_ugt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_ugt_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_ugt_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %icmp_sge_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 sge v4, v5 - return v6 +function %icmp_sge_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 sge v0, v1 + return v2 } -; run: %icmp_sge_i128(0, 0, 0, 0) == true -; run: %icmp_sge_i128(1, 0, 1, 0) == true -; run: %icmp_sge_i128(0, 0, 1, 0) == false -; run: %icmp_sge_i128(-1, -1, 0, 0) == false -; run: %icmp_sge_i128(0, 0, -1, -1) == true -; run: %icmp_sge_i128(-1, -1, -1, -1) == true -; run: %icmp_sge_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_sge_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_sge_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_sge_i128(0, 0) == true +; run: %icmp_sge_i128(1, 1) == true +; run: %icmp_sge_i128(0, 1) == false +; run: %icmp_sge_i128(-1, 0) == false +; run: %icmp_sge_i128(0, -1) == true +; run: %icmp_sge_i128(-1, -1) == true +; run: %icmp_sge_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_sge_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_sge_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %icmp_uge_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 uge v4, v5 - return v6 +function %icmp_uge_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 uge v0, v1 + return v2 } -; run: %icmp_uge_i128(0, 0, 0, 0) == true -; run: %icmp_uge_i128(1, 0, 1, 0) == true -; run: %icmp_uge_i128(0, 0, 1, 0) == false -; run: %icmp_uge_i128(-1, -1, 0, 0) == true -; run: %icmp_uge_i128(0, 0, -1, -1) == false -; run: %icmp_uge_i128(-1, -1, -1, -1) == true -; run: %icmp_uge_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_uge_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_uge_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_uge_i128(0, 0) == true +; run: %icmp_uge_i128(1, 1) == true +; run: %icmp_uge_i128(0, 1) == false +; run: %icmp_uge_i128(-1, 0) == true +; run: %icmp_uge_i128(0, -1) == false +; run: %icmp_uge_i128(-1, -1) == true +; run: %icmp_uge_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_uge_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_uge_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true ; Icmp Imm Tests diff --git a/cranelift/filetests/filetests/runtests/i128-load-store.clif b/cranelift/filetests/filetests/runtests/i128-load-store.clif index 41046e8717..4d4c9e11dd 100644 --- a/cranelift/filetests/filetests/runtests/i128-load-store.clif +++ b/cranelift/filetests/filetests/runtests/i128-load-store.clif @@ -1,136 +1,125 @@ test run +set enable_llvm_abi_extensions=true target x86_64 machinst target aarch64 -function %i128_stack_store_load(i64, i64) -> b1 { +function %i128_stack_store_load(i128) -> b1 { ss0 = explicit_slot 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss0 + v1 = stack_load.i128 ss0 - stack_store.i128 v2, ss0 - v3 = stack_load.i128 ss0 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load(0, 0) == true -; run: %i128_stack_store_load(-1, -1) == true -; run: %i128_stack_store_load(-1, 0) == true -; run: %i128_stack_store_load(0, -1) == true -; run: %i128_stack_store_load(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load(0) == true +; run: %i128_stack_store_load(-1) == true +; run: %i128_stack_store_load(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_stack_store_load_offset(i64, i64) -> b1 { +function %i128_stack_store_load_offset(i128) -> b1 { ss0 = explicit_slot 16, offset 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss0 + v1 = stack_load.i128 ss0 - stack_store.i128 v2, ss0 - v3 = stack_load.i128 ss0 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load_offset(0, 0) == true -; run: %i128_stack_store_load_offset(-1, -1) == true -; run: %i128_stack_store_load_offset(-1, 0) == true -; run: %i128_stack_store_load_offset(0, -1) == true -; run: %i128_stack_store_load_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load_offset(0) == true +; run: %i128_stack_store_load_offset(-1) == true +; run: %i128_stack_store_load_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_stack_store_load_inst_offset(i64, i64) -> b1 { +function %i128_stack_store_load_inst_offset(i128) -> b1 { ss0 = explicit_slot 16 ss1 = explicit_slot 16 ss2 = explicit_slot 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss1+16 + v1 = stack_load.i128 ss1+16 - stack_store.i128 v2, ss1+16 - v3 = stack_load.i128 ss1+16 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load_inst_offset(0, 0) == true -; run: %i128_stack_store_load_inst_offset(-1, -1) == true -; run: %i128_stack_store_load_inst_offset(-1, 0) == true -; run: %i128_stack_store_load_inst_offset(0, -1) == true -; run: %i128_stack_store_load_inst_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load_inst_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load_inst_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load_inst_offset(0) == true +; run: %i128_stack_store_load_inst_offset(-1) == true +; run: %i128_stack_store_load_inst_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load_inst_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load_inst_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load_inst_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load_inst_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true ; Some arches (aarch64) try to encode the offset into the load/store instructions ; test that we spill if the offset is too large and doesn't fit in the instruction -function %i128_stack_store_load_big_offset(i64, i64) -> b1 { +function %i128_stack_store_load_big_offset(i128) -> b1 { ss0 = explicit_slot 100000 ss1 = explicit_slot 8 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss0 + v1 = stack_load.i128 ss0 - stack_store.i128 v2, ss0 - v3 = stack_load.i128 ss0 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load_big_offset(0, 0) == true -; run: %i128_stack_store_load_big_offset(-1, -1) == true -; run: %i128_stack_store_load_big_offset(-1, 0) == true -; run: %i128_stack_store_load_big_offset(0, -1) == true -; run: %i128_stack_store_load_big_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load_big_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load_big_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load_big_offset(0) == true +; run: %i128_stack_store_load_big_offset(-1) == true +; run: %i128_stack_store_load_big_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load_big_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load_big_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load_big_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load_big_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_store_load(i64, i64) -> b1 { +function %i128_store_load(i128) -> b1 { ss0 = explicit_slot 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + v1 = stack_addr.i64 ss0 + store.i128 v0, v1 + v2 = load.i128 v1 - v3 = stack_addr.i64 ss0 - store.i128 v2, v3 - v4 = load.i128 v3 - - v5 = icmp.i128 eq v2, v4 - return v5 + v3 = icmp.i128 eq v0, v2 + return v3 } -; run: %i128_store_load(0, 0) == true -; run: %i128_store_load(-1, -1) == true -; run: %i128_store_load(-1, 0) == true -; run: %i128_store_load(0, -1) == true -; run: %i128_store_load(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_store_load(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_store_load(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_store_load(0) == true +; run: %i128_store_load(-1) == true +; run: %i128_store_load(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_store_load(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_store_load(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_store_load(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_store_load(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_store_load_offset(i64, i64) -> b1 { +function %i128_store_load_offset(i128) -> b1 { ss0 = explicit_slot 32 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + v1 = stack_addr.i64 ss0 + store.i128 v0, v1+16 + v2 = load.i128 v1+16 - v3 = stack_addr.i64 ss0 - store.i128 v2, v3+16 - v4 = load.i128 v3+16 - - v5 = icmp.i128 eq v2, v4 - return v5 + v3 = icmp.i128 eq v0, v2 + return v3 } -; run: %i128_store_load_offset(0, 0) == true -; run: %i128_store_load_offset(-1, -1) == true -; run: %i128_store_load_offset(-1, 0) == true -; run: %i128_store_load_offset(0, -1) == true -; run: %i128_store_load_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_store_load_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_store_load_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_store_load_offset(0) == true +; run: %i128_store_load_offset(-1) == true +; run: %i128_store_load_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_store_load_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_store_load_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_store_load_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_store_load_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true diff --git a/cranelift/filetests/filetests/runtests/i128-reduce.clif b/cranelift/filetests/filetests/runtests/i128-reduce.clif index 08ddfad194..15baef87f1 100644 --- a/cranelift/filetests/filetests/runtests/i128-reduce.clif +++ b/cranelift/filetests/filetests/runtests/i128-reduce.clif @@ -1,43 +1,40 @@ test run +set enable_llvm_abi_extensions=true target aarch64 target x86_64 machinst -function %ireduce_128_64(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i64 v2 - return v3 +function %ireduce_128_64(i128) -> i64 { +block0(v0: i128): + v1 = ireduce.i64 v0 + return v1 } -; run: %ireduce_128_64(0, 0) == 0 -; run: %ireduce_128_64(-1, -1) == -1 -; run: %ireduce_128_64(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xC0FFEEEE_DECAFFFF +; run: %ireduce_128_64(0) == 0 +; run: %ireduce_128_64(-1) == -1 +; run: %ireduce_128_64(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xC0FFEEEE_DECAFFFF -function %ireduce_128_32(i64, i64) -> i32 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i32 v2 - return v3 +function %ireduce_128_32(i128) -> i32 { +block0(v0: i128): + v1 = ireduce.i32 v0 + return v1 } -; run: %ireduce_128_32(0, 0) == 0 -; run: %ireduce_128_32(-1, -1) == -1 -; run: %ireduce_128_32(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xDECAFFFF +; run: %ireduce_128_32(0) == 0 +; run: %ireduce_128_32(-1) == -1 +; run: %ireduce_128_32(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xDECAFFFF -function %ireduce_128_16(i64, i64) -> i16 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i16 v2 - return v3 +function %ireduce_128_16(i128) -> i16 { +block0(v0: i128): + v1 = ireduce.i16 v0 + return v1 } -; run: %ireduce_128_16(0, 0) == 0 -; run: %ireduce_128_16(-1, -1) == -1 -; run: %ireduce_128_16(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xFFFF +; run: %ireduce_128_16(0) == 0 +; run: %ireduce_128_16(-1) == -1 +; run: %ireduce_128_16(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xFFFF -function %ireduce_128_8(i64, i64) -> i8 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i8 v2 - return v3 +function %ireduce_128_8(i128) -> i8 { +block0(v0: i128): + v1 = ireduce.i8 v0 + return v1 } -; run: %ireduce_128_8(0, 0) == 0 -; run: %ireduce_128_8(-1, -1) == -1 -; run: %ireduce_128_8(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xFF +; run: %ireduce_128_8(0) == 0 +; run: %ireduce_128_8(-1) == -1 +; run: %ireduce_128_8(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xFF diff --git a/cranelift/filetests/filetests/runtests/i128-rotate.clif b/cranelift/filetests/filetests/runtests/i128-rotate.clif index 359e9c1a7c..47a4ab9ea5 100644 --- a/cranelift/filetests/filetests/runtests/i128-rotate.clif +++ b/cranelift/filetests/filetests/runtests/i128-rotate.clif @@ -1,60 +1,51 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x target x86_64 machinst -function %rotl(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = rotl.i128 v3, v2 - v5, v6 = isplit v4 - return v5, v6 +function %rotl(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = rotl.i128 v0, v1 + return v2 } -; run: %rotl(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotl(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotl(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020202 +; run: %rotl(0x01010101_01010101_01010101_01010101, 73) == 0x02020202_02020202_02020202_02020202 +; run: %rotl(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotl(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 -function %rotr(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = rotr.i128 v3, v2 - v5, v6 = isplit v4 - return v5, v6 +function %rotr(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = rotr.i128 v0, v1 + return v2 } -; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotr(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotr(0x01010101_01010101_01010101_01010101, 9) == 0x80808080_80808080_80808080_80808080 +; run: %rotr(0x01010101_01010101_01010101_01010101, 73) == 0x80808080_80808080_80808080_80808080 +; run: %rotr(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotr(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 -function %rotl_amt_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = uextend.i64 v2 - v4 = iconcat v3, v3 +function %rotl_amt_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v5 = iconcat v0, v1 - - v6 = rotl.i128 v5, v4 - v7, v8 = isplit v6 - return v7, v8 + v4 = rotl.i128 v0, v3 + return v4 } -; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotl_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotl_amt_i128(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020202 +; run: %rotl_amt_i128(0x01010101_01010101_01010101_01010101, 73) == 0x02020202_02020202_02020202_02020202 +; run: %rotl_amt_i128(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotl_amt_i128(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 -function %rotr_amt_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = uextend.i64 v2 - v4 = iconcat v3, v3 - v5 = iconcat v0, v1 +function %rotr_amt_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v6 = rotr.i128 v5, v4 - v7, v8 = isplit v6 - return v7, v8 + v4 = rotr.i128 v0, v3 + return v4 } -; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotr_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotr_amt_i128(0x01010101_01010101_01010101_01010101, 9) == 0x80808080_80808080_80808080_80808080 +; run: %rotr_amt_i128(0x01010101_01010101_01010101_01010101, 73) == 0x80808080_80808080_80808080_80808080 +; run: %rotr_amt_i128(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotr_amt_i128(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 diff --git a/cranelift/filetests/filetests/runtests/i128-select.clif b/cranelift/filetests/filetests/runtests/i128-select.clif index 0d23939673..d557d31e2f 100644 --- a/cranelift/filetests/filetests/runtests/i128-select.clif +++ b/cranelift/filetests/filetests/runtests/i128-select.clif @@ -1,21 +1,15 @@ test run +set enable_llvm_abi_extensions=true target aarch64 target x86_64 machinst -function %i128_select(i8, i64, i64, i64, i64) -> i64, i64 { -block0(v0: i8, v1: i64, v2: i64, v3: i64, v4: i64): - v5 = icmp_imm ne v0, 0 - - v6 = iconcat v1, v2 - v7 = iconcat v3, v4 - - v8 = select.i128 v5, v6, v7 - - v9, v10 = isplit v8 - return v9, v10 +function %i128_select(b1, i128, i128) -> i128 { +block0(v0: b1, v1: i128, v2: i128): + v3 = select.i128 v0, v1, v2 + return v3 } -; run: %i128_select(1, 0, 0, 1, 1) == [0, 0] -; run: %i128_select(0, 0, 0, 1, 1) == [1, 1] +; run: %i128_select(true, 0, 1) == 0 +; run: %i128_select(false, 0, 1) == 1 -; run: %i128_select(1, 1, 2, 3, 4) == [1, 2] -; run: %i128_select(0, 1, 2, 3, 4) == [3, 4] +; run: %i128_select(true, 0x00000000_00000000_DECAFFFF_C0FFEEEE, 0xFFFFFFFF_FFFFFFFF_C0FFEEEE_DECAFFFF) == 0x00000000_00000000_DECAFFFF_C0FFEEEE +; run: %i128_select(false, 0x00000000_00000000_DECAFFFF_C0FFEEEE, 0xFFFFFFFF_FFFFFFFF_C0FFEEEE_DECAFFFF) == 0xFFFFFFFF_FFFFFFFF_C0FFEEEE_DECAFFFF diff --git a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif index c486092703..094315426f 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif @@ -3,88 +3,82 @@ target aarch64 ; TODO: Merge this with the main i128-shifts file when x86_64 passes these. -function %ishl_i16_i128(i16, i64, i64) -> i16 { -block0(v0: i16, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i16 v0, v3 - return v4 +function %ishl_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ishl.i16 v0, v1 + return v2 } -; run: %ishl_i16_i128(0x0000, 0, 0) == 0x0000 -; run: %ishl_i16_i128(0x0000, 1, 0) == 0x0000 -; run: %ishl_i16_i128(0x000f, 0, 4) == 0x000f -; run: %ishl_i16_i128(0x000f, 4, 0) == 0x00f0 -; run: %ishl_i16_i128(0x0004, 16, 0) == 0x0004 -; run: %ishl_i16_i128(0x0004, 17, 0) == 0x0008 -; run: %ishl_i16_i128(0x0004, 18, 1) == 0x0010 +; run: %ishl_i16_i128(0x0000, 0) == 0x0000 +; run: %ishl_i16_i128(0x0000, 1) == 0x0000 +; run: %ishl_i16_i128(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i128(0x0004, 16) == 0x0004 +; run: %ishl_i16_i128(0x0004, 17) == 0x0008 +; run: %ishl_i16_i128(0x000f, 0x00000000_00000004_00000000_00000000) == 0x000f +; run: %ishl_i16_i128(0x0004, 0x00000000_00000001_00000000_00000012) == 0x0010 -function %ishl_i8_i128(i8, i64, i64) -> i8 { -block0(v0: i8, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i8 v0, v3 - return v4 +function %ishl_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ishl.i8 v0, v1 + return v2 } -; run: %ishl_i8_i128(0x00, 0, 0) == 0x00 -; run: %ishl_i8_i128(0x00, 1, 0) == 0x00 -; run: %ishl_i8_i128(0x0f, 0, 4) == 0x0f -; run: %ishl_i8_i128(0x0f, 4, 0) == 0xf0 -; run: %ishl_i8_i128(0x04, 8, 0) == 0x04 -; run: %ishl_i8_i128(0x04, 9, 0) == 0x08 -; run: %ishl_i8_i128(0x04, 10, 1) == 0x10 +; run: %ishl_i8_i128(0x00, 0) == 0x00 +; run: %ishl_i8_i128(0x00, 1) == 0x00 +; run: %ishl_i8_i128(0x0f, 4) == 0xf0 +; run: %ishl_i8_i128(0x04, 8) == 0x04 +; run: %ishl_i8_i128(0x04, 9) == 0x08 +; run: %ishl_i8_i128(0x0f, 0x00000000_00000004_00000000_00000000) == 0x0f +; run: %ishl_i8_i128(0x04, 0x00000000_00000001_00000000_0000000A) == 0x10 -function %ushr_i16_i128(i16, i64, i64) -> i16 { -block0(v0: i16, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ushr.i16 v0, v3 - return v4 +function %ushr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ushr.i16 v0, v1 + return v2 } -; run: %ushr_i16_i128(0x1000, 0, 0) == 0x1000 -; run: %ushr_i16_i128(0x1000, 1, 0) == 0x0800 -; run: %ushr_i16_i128(0xf000, 0, 4) == 0xf000 -; run: %ushr_i16_i128(0xf000, 4, 0) == 0x0f00 -; run: %ushr_i16_i128(0x4000, 16, 0) == 0x4000 -; run: %ushr_i16_i128(0x4000, 17, 0) == 0x2000 -; run: %ushr_i16_i128(0x4000, 18, 1) == 0x1000 +; run: %ushr_i16_i128(0x1000, 0) == 0x1000 +; run: %ushr_i16_i128(0x1000, 1) == 0x0800 +; run: %ushr_i16_i128(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i128(0x4000, 16) == 0x4000 +; run: %ushr_i16_i128(0x4000, 17) == 0x2000 +; run: %ushr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 +; run: %ushr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 -function %ushr_i8_i128(i8, i64, i64) -> i8 { -block0(v0: i8, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ushr.i8 v0, v3 - return v4 +function %ushr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ushr.i8 v0, v1 + return v2 } -; run: %ushr_i8_i128(0x10, 0, 0) == 0x10 -; run: %ushr_i8_i128(0x10, 1, 0) == 0x08 -; run: %ushr_i8_i128(0xf0, 0, 4) == 0xf0 -; run: %ushr_i8_i128(0xf0, 4, 0) == 0x0f -; run: %ushr_i8_i128(0x40, 8, 0) == 0x40 -; run: %ushr_i8_i128(0x40, 9, 0) == 0x20 -; run: %ushr_i8_i128(0x40, 10, 1) == 0x10 +; run: %ushr_i8_i128(0x10, 0) == 0x10 +; run: %ushr_i8_i128(0x10, 1) == 0x08 +; run: %ushr_i8_i128(0xf0, 4) == 0x0f +; run: %ushr_i8_i128(0x40, 8) == 0x40 +; run: %ushr_i8_i128(0x40, 9) == 0x20 +; run: %ushr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 +; run: %ushr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 -function %sshr_i16_i128(i16, i64, i64) -> i16 { -block0(v0: i16, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i16 v0, v3 - return v4 +function %sshr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = sshr.i16 v0, v1 + return v2 } -; run: %sshr_i16_i128(0x8000, 0, 0) == 0x8000 -; run: %sshr_i16_i128(0x8000, 1, 0) == 0xC000 -; run: %sshr_i16_i128(0xf000, 0, 4) == 0xf000 -; run: %sshr_i16_i128(0xf000, 4, 0) == 0xff00 -; run: %sshr_i16_i128(0x4000, 16, 0) == 0x4000 -; run: %sshr_i16_i128(0x4000, 17, 0) == 0x2000 -; run: %sshr_i16_i128(0x4000, 18, 1) == 0x1000 +; run: %sshr_i16_i128(0x8000, 0) == 0x8000 +; run: %sshr_i16_i128(0x8000, 1) == 0xC000 +; run: %sshr_i16_i128(0xf000, 4) == 0xff00 +; run: %sshr_i16_i128(0x4000, 16) == 0x4000 +; run: %sshr_i16_i128(0x4000, 17) == 0x2000 +; run: %sshr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 +; run: %sshr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 -function %sshr_i8_i128(i8, i64, i64) -> i8 { -block0(v0: i8, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i8 v0, v3 - return v4 +function %sshr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = sshr.i8 v0, v1 + return v2 } -; run: %sshr_i8_i128(0x80, 0, 0) == 0x80 -; run: %sshr_i8_i128(0x80, 1, 0) == 0xC0 -; run: %sshr_i8_i128(0xf0, 0, 4) == 0xf0 -; run: %sshr_i8_i128(0xf0, 4, 0) == 0xff -; run: %sshr_i8_i128(0x40, 8, 0) == 0x40 -; run: %sshr_i8_i128(0x40, 9, 0) == 0x20 -; run: %sshr_i8_i128(0x40, 10, 1) == 0x10 +; run: %sshr_i8_i128(0x80, 0) == 0x80 +; run: %sshr_i8_i128(0x80, 1) == 0xC0 +; run: %sshr_i8_i128(0xf0, 4) == 0xff +; run: %sshr_i8_i128(0x40, 8) == 0x40 +; run: %sshr_i8_i128(0x40, 9) == 0x20 +; run: %sshr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 +; run: %sshr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 diff --git a/cranelift/filetests/filetests/runtests/i128-shifts.clif b/cranelift/filetests/filetests/runtests/i128-shifts.clif index 1e862cb7e9..41cef85e16 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts.clif @@ -1,271 +1,232 @@ test run +set enable_llvm_abi_extensions=true target aarch64 target x86_64 machinst -function %ishl_i128_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = uextend.i64 v2 - v5 = iconcat v4, v4 +function %ishl_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v6 = ishl.i128 v3, v5 + v4 = ishl.i128 v0, v3 - v7, v8 = isplit v6 - return v7, v8 -} -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i128(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i64(i64, i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i64(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i64(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i64(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i64(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i64(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i64(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i32(i64, i64, i32) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i32(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i32(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i32(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i32(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i32(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i32(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i16(i64, i64, i16) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i16): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i16(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i16(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i16(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i16(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i16(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i16(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i8(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i8(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i8(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i8(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - - -function %ishl_i64_i128(i64, i64, i64) -> i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i64 v0, v3 return v4 } -; run: %ishl_i64_i128(0x00000000_00000000, 0, 0) == 0x00000000_00000000 -; run: %ishl_i64_i128(0x00000000_00000000, 1, 0) == 0x00000000_00000000 -; run: %ishl_i64_i128(0x0000000f_0000000f, 0, 4) == 0x0000000f_0000000f -; run: %ishl_i64_i128(0x0000000f_0000000f, 4, 0) == 0x000000f0_000000f0 -; run: %ishl_i64_i128(0x00000000_00000004, 64, 0) == 0x00000000_00000004 -; run: %ishl_i64_i128(0x00000000_00000004, 65, 0) == 0x00000000_00000008 -; run: %ishl_i64_i128(0x00000000_00000004, 66, 1) == 0x00000000_00000010 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i128(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i128(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i128(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i128(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i128(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i128(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 -function %ishl_i32_i128(i32, i64, i64) -> i32 { -block0(v0: i32, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i32 v0, v3 +function %ishl_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i64(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i64(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i64(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i64(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i64(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i64(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + +function %ishl_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i32(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i32(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i32(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i32(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i32(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i32(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + +function %ishl_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i16(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i16(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i16(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i16(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i16(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i16(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + +function %ishl_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i8(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i8(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i8(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i8(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i8(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i8(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + + +function %ishl_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = ishl.i64 v0, v1 + return v2 +} +; run: %ishl_i64_i128(0x00000000_00000000, 0) == 0x00000000_00000000 +; run: %ishl_i64_i128(0x00000000_00000000, 1) == 0x00000000_00000000 +; run: %ishl_i64_i128(0x0000000f_0000000f, 4) == 0x000000f0_000000f0 +; run: %ishl_i64_i128(0x00000000_00000004, 64) == 0x00000000_00000004 +; run: %ishl_i64_i128(0x00000000_00000004, 65) == 0x00000000_00000008 +; run: %ishl_i64_i128(0x0000000f_0000000f, 0x00000000_00000004_00000000_00000000) == 0x0000000f_0000000f +; run: %ishl_i64_i128(0x00000000_00000004, 0x00000000_00000001_00000000_00000042) == 0x00000000_00000010 + +function %ishl_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = ishl.i32 v0, v1 + return v2 +} +; run: %ishl_i32_i128(0x00000000, 0) == 0x00000000 +; run: %ishl_i32_i128(0x00000000, 1) == 0x00000000 +; run: %ishl_i32_i128(0x0000000f, 4) == 0x000000f0 +; run: %ishl_i32_i128(0x00000004, 32) == 0x00000004 +; run: %ishl_i32_i128(0x00000004, 33) == 0x00000008 +; run: %ishl_i32_i128(0x0000000f, 0x00000000_00000004_00000000_00000000) == 0x0000000f +; run: %ishl_i32_i128(0x00000004, 0x00000000_00000001_00000000_00000022) == 0x00000010 + + + +function %ushr_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 + + v4 = ushr.i128 v0, v3 return v4 } -; run: %ishl_i32_i128(0x00000000, 0, 0) == 0x00000000 -; run: %ishl_i32_i128(0x00000000, 1, 0) == 0x00000000 -; run: %ishl_i32_i128(0x0000000f, 0, 4) == 0x0000000f -; run: %ishl_i32_i128(0x0000000f, 4, 0) == 0x000000f0 -; run: %ishl_i32_i128(0x00000004, 32, 0) == 0x00000004 -; run: %ishl_i32_i128(0x00000004, 33, 0) == 0x00000008 -; run: %ishl_i32_i128(0x00000004, 34, 1) == 0x00000010 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i128(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i128(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i128(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i128(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i128(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 - - -function %ushr_i128_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = uextend.i64 v2 - v5 = iconcat v4, v4 - - v6 = ushr.i128 v3, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %ushr_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i64(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i64(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i64(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i64(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i64(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i64(i64, i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i64(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i64(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i64(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i64(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i64(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i32(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i32(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i32(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i32(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i32(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i32(i64, i64, i32) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i32(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i32(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i32(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i32(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i32(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i16(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i16(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i16(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i16(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i16(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i16(i64, i64, i16) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i16): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i16(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i16(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i16(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i16(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i16(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i8(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i8(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i8(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i8(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i8(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i8(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = ushr.i64 v0, v1 + return v2 } -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i8(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i8(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] - -function %ushr_i64_i128(i64, i64, i64) -> i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ushr.i64 v0, v3 - return v4 -} -; run: %ushr_i64_i128(0x10000000_10000000, 0, 0) == 0x10000000_10000000 -; run: %ushr_i64_i128(0x10000000_10000000, 1, 0) == 0x08000000_08000000 -; run: %ushr_i64_i128(0xf0000000_f0000000, 0, 4) == 0xf0000000_f0000000 -; run: %ushr_i64_i128(0xf0000000_f0000000, 4, 0) == 0x0f000000_0f000000 -; run: %ushr_i64_i128(0x40000000_40000000, 64, 0) == 0x40000000_40000000 -; run: %ushr_i64_i128(0x40000000_40000000, 65, 0) == 0x20000000_20000000 -; run: %ushr_i64_i128(0x40000000_40000000, 66, 1) == 0x10000000_10000000 +; run: %ushr_i64_i128(0x10000000_10000000, 0) == 0x10000000_10000000 +; run: %ushr_i64_i128(0x10000000_10000000, 1) == 0x08000000_08000000 +; run: %ushr_i64_i128(0xf0000000_f0000000, 4) == 0x0f000000_0f000000 +; run: %ushr_i64_i128(0x40000000_40000000, 64) == 0x40000000_40000000 +; run: %ushr_i64_i128(0x40000000_40000000, 65) == 0x20000000_20000000 +; run: %ushr_i64_i128(0xf0000000_f0000000, 0x00000000_00000004_00000000_00000000) == 0xf0000000_f0000000 +; run: %ushr_i64_i128(0x40000000_40000000, 0x00000000_00000001_00000000_00000042) == 0x10000000_10000000 function %ushr_i32_i128(i32, i64, i64) -> i32 { block0(v0: i32, v1: i64, v2: i64): @@ -283,130 +244,110 @@ block0(v0: i32, v1: i64, v2: i64): -function %sshr_i128_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = uextend.i64 v2 - v5 = iconcat v4, v4 +function %sshr_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v6 = sshr.i128 v3, v5 + v4 = sshr.i128 v0, v3 - v7, v8 = isplit v6 - return v7, v8 -} -; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i128(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i128(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i128(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - -function %sshr_i128_i64(i64, i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i64(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i64(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i64(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i64(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i64(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i64(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i64(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i64(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i64(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - - -function %sshr_i128_i32(i64, i64, i32) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i32(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i32(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i32(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i32(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i32(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i32(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i32(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i32(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i32(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - - -function %sshr_i128_i16(i64, i64, i16) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i16): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i16(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i16(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i16(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i16(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i16(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i16(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i16(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i16(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i16(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - -function %sshr_i128_i8(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i8(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i8(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i8(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - - -function %sshr_i64_i128(i64, i64, i64) -> i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i64 v0, v3 return v4 } -; run: %sshr_i64_i128(0x80000000_80000000, 0, 0) == 0x80000000_80000000 -; run: %sshr_i64_i128(0x80000000_80000000, 1, 0) == 0xC0000000_40000000 -; run: %sshr_i64_i128(0xf0000000_f0000000, 0, 4) == 0xf0000000_f0000000 -; run: %sshr_i64_i128(0xf0000000_f0000000, 4, 0) == 0xff000000_0f000000 -; run: %sshr_i64_i128(0x40000000_40000000, 64, 0) == 0x40000000_40000000 -; run: %sshr_i64_i128(0x40000000_40000000, 65, 0) == 0x20000000_20000000 -; run: %sshr_i64_i128(0x40000000_40000000, 66, 1) == 0x10000000_10000000 +; run: %sshr_i128_i128(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i128(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i128(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i128(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i128(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i128(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i128(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i128(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i128(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 -function %sshr_i32_i128(i32, i64, i64) -> i32 { -block0(v0: i32, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i32 v0, v3 - return v4 +function %sshr_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = sshr.i128 v0, v1 + return v2 } -; run: %sshr_i32_i128(0x80000000, 0, 0) == 0x80000000 -; run: %sshr_i32_i128(0x80000000, 1, 0) == 0xC0000000 -; run: %sshr_i32_i128(0xf0000000, 0, 4) == 0xf0000000 -; run: %sshr_i32_i128(0xf0000000, 4, 0) == 0xff000000 -; run: %sshr_i32_i128(0x40000000, 32, 0) == 0x40000000 -; run: %sshr_i32_i128(0x40000000, 33, 0) == 0x20000000 -; run: %sshr_i32_i128(0x40000000, 34, 1) == 0x10000000 +; run: %sshr_i128_i64(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i64(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i64(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i64(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i64(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i64(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i64(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i64(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i64(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + + +function %sshr_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = sshr.i128 v0, v1 + return v2 +} +; run: %sshr_i128_i32(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i32(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i32(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i32(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i32(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i32(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i32(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i32(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i32(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + + +function %sshr_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = sshr.i128 v0, v1 + return v2 +} +; run: %sshr_i128_i16(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i16(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i16(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i16(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i16(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i16(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i16(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i16(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i16(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + +function %sshr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = sshr.i128 v0, v1 + return v2 +} +; run: %sshr_i128_i8(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i8(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i8(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i8(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i8(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i8(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i8(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i8(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i8(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + + +function %sshr_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = sshr.i64 v0, v1 + return v2 +} +; run: %sshr_i64_i128(0x80000000_80000000, 0) == 0x80000000_80000000 +; run: %sshr_i64_i128(0x80000000_80000000, 1) == 0xC0000000_40000000 +; run: %sshr_i64_i128(0xf0000000_f0000000, 4) == 0xff000000_0f000000 +; run: %sshr_i64_i128(0x40000000_40000000, 64) == 0x40000000_40000000 +; run: %sshr_i64_i128(0x40000000_40000000, 65) == 0x20000000_20000000 +; run: %sshr_i64_i128(0xf0000000_f0000000, 0x00000000_00000004_00000000_00000000) == 0xf0000000_f0000000 +; run: %sshr_i64_i128(0x40000000_40000000, 0x00000000_00000001_00000000_00000042) == 0x10000000_10000000 + +function %sshr_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = sshr.i32 v0, v1 + return v2 +} +; run: %sshr_i32_i128(0x80000000, 0) == 0x80000000 +; run: %sshr_i32_i128(0x80000000, 1) == 0xC0000000 +; run: %sshr_i32_i128(0xf0000000, 4) == 0xff000000 +; run: %sshr_i32_i128(0x40000000, 32) == 0x40000000 +; run: %sshr_i32_i128(0x40000000, 33) == 0x20000000 +; run: %sshr_i32_i128(0xf0000000, 0x00000000_00000004_00000000_00000000) == 0xf0000000 +; run: %sshr_i32_i128(0x40000000, 0x00000000_00000001_00000000_00000022) == 0x10000000 From 3a4ebd772700deb77e5da2fa310e10203ba4ec9d Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sun, 19 Sep 2021 15:00:41 +0100 Subject: [PATCH 13/93] cranelift: Deduplicate match_imm functions Transforming this into a generic function is proving to be a challenge since most of the necessary methods are not in a trait. We also need to cast between the signed and unsigned types, which is difficult to do in a generic function. This can be solved for example by adding the num crate as a dependency. But adding a dependency just to solve this issue seems a bit much. --- cranelift/reader/src/parser.rs | 207 ++++++++------------------------- 1 file changed, 50 insertions(+), 157 deletions(-) diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index a0e5a35e3a..a8f20230fc 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -31,6 +31,51 @@ use std::str::FromStr; use std::{u16, u32}; use target_lexicon::Triple; +macro_rules! match_imm { + ($signed:ty, $unsigned:ty, $parser:expr, $err_msg:expr) => {{ + if let Some(Token::Integer(text)) = $parser.token() { + $parser.consume(); + let negative = text.starts_with('-'); + let positive = text.starts_with('+'); + let text = if negative || positive { + // Strip sign prefix. + &text[1..] + } else { + text + }; + + // Parse the text value; the lexer gives us raw text that looks like an integer. + let value = if text.starts_with("0x") { + // Skip underscores. + let text = text.replace("_", ""); + // Parse it in hexadecimal form. + <$unsigned>::from_str_radix(&text[2..], 16).map_err(|_| { + $parser.error("unable to parse value as a hexadecimal immediate") + })? + } else { + // Parse it as a signed type to check for overflow and other issues. + text.parse() + .map_err(|_| $parser.error("expected decimal immediate"))? + }; + + // Apply sign if necessary. + let signed = if negative { + let value = value.wrapping_neg() as $signed; + if value > 0 { + return Err($parser.error("negative number too small")); + } + value + } else { + value as $signed + }; + + Ok(signed) + } else { + err!($parser.loc, $err_msg) + } + }}; +} + /// After some quick benchmarks a program should never have more than 100,000 blocks. const MAX_BLOCKS_IN_A_FUNCTION: u32 = 100_000; @@ -793,175 +838,23 @@ impl<'a> Parser<'a> { // Match and consume an i8 immediate. fn match_imm8(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::Integer(text)) = self.token() { - self.consume(); - let negative = text.starts_with('-'); - let positive = text.starts_with('+'); - let text = if negative || positive { - // Strip sign prefix. - &text[1..] - } else { - text - }; - - // Parse the text value; the lexer gives us raw text that looks like an integer. - let value = if text.starts_with("0x") { - // Skip underscores. - let text = text.replace("_", ""); - // Parse it as a i8 in hexadecimal form. - u8::from_str_radix(&text[2..], 16) - .map_err(|_| self.error("unable to parse i8 as a hexadecimal immediate"))? - } else { - // Parse it as a i8 to check for overflow and other issues. - text.parse() - .map_err(|_| self.error("expected i8 decimal immediate"))? - }; - - // Apply sign if necessary. - let signed = if negative { - let value = value.wrapping_neg() as i8; - if value > 0 { - return Err(self.error("negative number too small")); - } - value - } else { - value as i8 - }; - - Ok(signed) - } else { - err!(self.loc, err_msg) - } + match_imm!(i8, u8, self, err_msg) } // Match and consume a signed 16-bit immediate. fn match_imm16(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::Integer(text)) = self.token() { - self.consume(); - let negative = text.starts_with('-'); - let positive = text.starts_with('+'); - let text = if negative || positive { - // Strip sign prefix. - &text[1..] - } else { - text - }; - - // Parse the text value; the lexer gives us raw text that looks like an integer. - let value = if text.starts_with("0x") { - // Skip underscores. - let text = text.replace("_", ""); - // Parse it as a i16 in hexadecimal form. - u16::from_str_radix(&text[2..], 16) - .map_err(|_| self.error("unable to parse i16 as a hexadecimal immediate"))? - } else { - // Parse it as a i16 to check for overflow and other issues. - text.parse() - .map_err(|_| self.error("expected i16 decimal immediate"))? - }; - - // Apply sign if necessary. - let signed = if negative { - let value = value.wrapping_neg() as i16; - if value > 0 { - return Err(self.error("negative number too small")); - } - value - } else { - value as i16 - }; - - Ok(signed) - } else { - err!(self.loc, err_msg) - } + match_imm!(i16, u16, self, err_msg) } // Match and consume an i32 immediate. // This is used for stack argument byte offsets. fn match_imm32(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::Integer(text)) = self.token() { - self.consume(); - let negative = text.starts_with('-'); - let positive = text.starts_with('+'); - let text = if negative || positive { - // Strip sign prefix. - &text[1..] - } else { - text - }; - - // Parse the text value; the lexer gives us raw text that looks like an integer. - let value = if text.starts_with("0x") { - // Skip underscores. - let text = text.replace("_", ""); - // Parse it as a i32 in hexadecimal form. - u32::from_str_radix(&text[2..], 16) - .map_err(|_| self.error("unable to parse i32 as a hexadecimal immediate"))? - } else { - // Parse it as a i32 to check for overflow and other issues. - text.parse() - .map_err(|_| self.error("expected i32 decimal immediate"))? - }; - - // Apply sign if necessary. - let signed = if negative { - let value = value.wrapping_neg() as i32; - if value > 0 { - return Err(self.error("negative number too small")); - } - value - } else { - value as i32 - }; - - Ok(signed) - } else { - err!(self.loc, err_msg) - } + match_imm!(i32, u32, self, err_msg) } // Match and consume an i128 immediate. fn match_imm128(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::Integer(text)) = self.token() { - self.consume(); - let negative = text.starts_with('-'); - let positive = text.starts_with('+'); - let text = if negative || positive { - // Strip sign prefix. - &text[1..] - } else { - text - }; - - // Parse the text value; the lexer gives us raw text that looks like an integer. - let value = if text.starts_with("0x") { - // Skip underscores. - let text = text.replace("_", ""); - // Parse it as a i128 in hexadecimal form. - u128::from_str_radix(&text[2..], 16) - .map_err(|_| self.error("unable to parse i128 as a hexadecimal immediate"))? - } else { - // Parse it as a i128 to check for overflow and other issues. - text.parse() - .map_err(|_| self.error("expected i128 decimal immediate"))? - }; - - // Apply sign if necessary. - let signed = if negative { - let value = value.wrapping_neg() as i128; - if value > 0 { - return Err(self.error("negative number too small")); - } - value - } else { - value as i128 - }; - - Ok(signed) - } else { - err!(self.loc, err_msg) - } + match_imm!(i128, u128, self, err_msg) } // Match and consume an optional offset32 immediate. @@ -2848,7 +2741,7 @@ impl<'a> Parser<'a> { I16 => DataValue::from(self.match_imm16("expected an i16")?), I32 => DataValue::from(self.match_imm32("expected an i32")?), I64 => DataValue::from(Into::::into(self.match_imm64("expected an i64")?)), - I128 => DataValue::from(self.match_imm128("expected an i64")?), + I128 => DataValue::from(self.match_imm128("expected an i128")?), F32 => DataValue::from(self.match_ieee32("expected an f32")?), F64 => DataValue::from(self.match_ieee64("expected an f64")?), _ if ty.is_vector() => { From 3fc29f5f6c9e6c41c4c85ecc16dab0ddf808d6d7 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Mon, 20 Sep 2021 09:57:19 +0100 Subject: [PATCH 14/93] Return `u128` from `bounds`; form `new_vec` from iter chain Copyright (c) 2021, Arm Limited --- cranelift/codegen/src/ir/types.rs | 23 ++++++++++++----------- cranelift/interpreter/src/step.rs | 16 ++++++++++------ 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index 2d9c7e709e..709c5bb08d 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -80,23 +80,24 @@ impl Type { } /// Get the (minimum, maximum) values represented by each lane in the type. - pub fn bounds(self, signed: bool) -> (i128, i128) { + /// Note that these are returned as unsigned 'bit patterns'. + pub fn bounds(self, signed: bool) -> (u128, u128) { if signed { match self.lane_type() { - I8 => (i8::MIN as i128, i8::MAX as i128), - I16 => (i16::MIN as i128, i16::MAX as i128), - I32 => (i32::MIN as i128, i32::MAX as i128), - I64 => (i64::MIN as i128, i64::MAX as i128), - I128 => (i128::MIN, i128::MAX), + I8 => (i8::MIN as u128, i8::MAX as u128), + I16 => (i16::MIN as u128, i16::MAX as u128), + I32 => (i32::MIN as u128, i32::MAX as u128), + I64 => (i64::MIN as u128, i64::MAX as u128), + I128 => (i128::MIN as u128, i128::MAX as u128), _ => unimplemented!(), } } else { match self.lane_type() { - I8 => (u8::MIN as i128, u8::MAX as i128), - I16 => (u16::MIN as i128, u16::MAX as i128), - I32 => (u32::MIN as i128, u32::MAX as i128), - I64 => (u64::MIN as i128, u64::MAX as i128), - I128 => (u128::MIN as i128, u128::MAX as i128), + I8 => (u8::MIN as u128, u8::MAX as u128), + I16 => (u16::MIN as u128, u16::MAX as u128), + I32 => (u32::MIN as u128, u32::MAX as u128), + I64 => (u64::MIN as u128, u64::MAX as u128), + I128 => (u128::MIN, u128::MAX), _ => unimplemented!(), } } diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 6a86eb1cb6..31f85a8bdc 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -782,16 +782,15 @@ where Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => { let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; - let mut new_vec = SimdVec::new(); let new_type = ctrl_ty.split_lanes().unwrap(); let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow); - let mut min: V = Value::int(min, ctrl_ty.lane_type())?; - let mut max: V = Value::int(max, ctrl_ty.lane_type())?; + let mut min: V = Value::int(min as i128, ctrl_ty.lane_type())?; + let mut max: V = Value::int(max as i128, ctrl_ty.lane_type())?; if inst.opcode() == Opcode::Uunarrow { min = min.convert(ValueConversionKind::ToUnsigned)?; max = max.convert(ValueConversionKind::ToUnsigned)?; } - for mut lane in arg0.into_iter().chain(arg1) { + let narrow = |mut lane: V| -> ValueResult { if inst.opcode() == Opcode::Uunarrow { lane = lane.convert(ValueConversionKind::ToUnsigned)?; } @@ -801,8 +800,13 @@ where if inst.opcode() == Opcode::Unarrow || inst.opcode() == Opcode::Uunarrow { lane = lane.convert(ValueConversionKind::ToUnsigned)?; } - new_vec.push(lane); - } + Ok(lane) + }; + let new_vec = arg0 + .into_iter() + .chain(arg1) + .map(|lane| narrow(lane)) + .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } Opcode::Sextend => assign(Value::convert( From 8abb19cbd81a0f4562e3de2ed1e668f734879bce Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Mon, 20 Sep 2021 10:31:34 +0100 Subject: [PATCH 15/93] Generate `new_vec` using an iterator chain Copyright (c) 2021, Arm Limited --- cranelift/interpreter/src/step.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 5157084330..4efc5b88d7 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -850,14 +850,19 @@ where let new_type = ctrl_ty.merge_lanes().unwrap(); let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; - let mut new_vec = SimdVec::new(); - for (x, y) in arg0.chunks(2).into_iter().zip(arg1.chunks(2).into_iter()) { - let mut z = 0i128; - for (lhs, rhs) in x.into_iter().zip(y.into_iter()) { - z += lhs.clone().into_int()? * rhs.clone().into_int()?; - } - new_vec.push(Value::int(z, new_type.lane_type())?); - } + let new_vec = arg0 + .chunks(2) + .into_iter() + .zip(arg1.chunks(2)) + .into_iter() + .map(|(x, y)| { + let mut z = 0i128; + for (lhs, rhs) in x.into_iter().zip(y.into_iter()) { + z += lhs.clone().into_int()? * rhs.clone().into_int()?; + } + Value::int(z, new_type.lane_type()) + }) + .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), From 7c5acfa96c450dc4f665eb315d2428374d87b55e Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 20 Sep 2021 16:33:16 +0200 Subject: [PATCH 16/93] Add QEMU CI runner for the s390x architecture * Add QEMU CI runner for s390x * Disable lightbeam tests for s390x --- .github/workflows/main.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bf9e849931..24c41b1b0b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -236,6 +236,12 @@ jobs: qemu_target: aarch64-linux-user # FIXME(#3183) shouldn't be necessary to specify this qemu_flags: -cpu max,pauth=off + - os: ubuntu-latest + target: s390x-unknown-linux-gnu + gcc_package: gcc-s390x-linux-gnu + gcc: s390x-linux-gnu-gcc + qemu: qemu-s390x -L /usr/s390x-linux-gnu + qemu_target: s390x-linux-user steps: - uses: actions/checkout@v2 with: @@ -326,9 +332,9 @@ jobs: # Build and test lightbeam. Note that # Lightbeam tests fail right now, but we don't want to block on that. - run: cargo build --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' + if: matrix.target != 'aarch64-unknown-linux-gnu' && matrix.target != 's390x-unknown-linux-gnu' - run: cargo test --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' + if: matrix.target != 'aarch64-unknown-linux-gnu' && matrix.target != 's390x-unknown-linux-gnu' continue-on-error: true env: RUST_BACKTRACE: 1 From a8467d082476169c8e3f858823390fe4135626ae Mon Sep 17 00:00:00 2001 From: Advance Software Date: Mon, 20 Sep 2021 18:33:20 +0100 Subject: [PATCH 17/93] =?UTF-8?q?Exports=20symbols=20to=20be=20shared=20wi?= =?UTF-8?q?th=20external=20GDB/JIT=20debugging=20interfac=E2=80=A6=20(#337?= =?UTF-8?q?3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Exports symbols to be shared with external GDB/JIT debugging interface tools. Windows O/S specific requirement. * Moved comments into platform specific compiler directive sections. --- crates/runtime/src/helpers.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/crates/runtime/src/helpers.c b/crates/runtime/src/helpers.c index daf47dda11..66b87a150b 100644 --- a/crates/runtime/src/helpers.c +++ b/crates/runtime/src/helpers.c @@ -61,14 +61,16 @@ void wasmtime_longjmp(void *JmpBuf) { platform_longjmp(*buf, 1); } -// Just in case cross-language LTO is enabled we set the `noinline` attribute -// and also try to have some sort of side effect in this function with a dummy -// `asm` statement. -// -// Note the `weak` linkage here, though, which is intended to let other code -// override this symbol if it's defined elsewhere, since this definition doesn't -// matter. -#ifndef CFG_TARGET_OS_windows +#ifdef CFG_TARGET_OS_windows + // export required for external access. +__declspec(dllexport) +#else + // Note the `weak` linkage here, though, which is intended to let other code + // override this symbol if it's defined elsewhere, since this definition doesn't + // matter. + // Just in case cross-language LTO is enabled we set the `noinline` attribute + // and also try to have some sort of side effect in this function with a dummy + // `asm` statement. __attribute__((weak, noinline)) #endif void __jit_debug_register_code() { @@ -84,14 +86,19 @@ struct JITDescriptor { void* first_entry_; }; -// Note the `weak` linkage here which is the same purpose as above. We want to -// let other runtimes be able to override this since our own definition isn't -// important. -#ifndef CFG_TARGET_OS_windows -__attribute__((weak)) +#ifdef CFG_TARGET_OS_windows + // export required for external access. + __declspec(dllexport) +#else + // Note the `weak` linkage here which is the same purpose as above. We want to + // let other runtimes be able to override this since our own definition isn't + // important. + __attribute__((weak)) #endif struct JITDescriptor __jit_debug_descriptor = {1, 0, NULL, NULL}; + + struct JITDescriptor* wasmtime_jit_debug_descriptor() { return &__jit_debug_descriptor; } From 5d3012d8f0fc5b01b35e3c17a023c7bf4e2ebf5e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 20 Sep 2021 12:34:27 -0500 Subject: [PATCH 18/93] Cranelift 9/20 meeting notes (#3374) * Cranelift 9/20 meeting notes * Update cranelift-09-20.md --- meetings/cranelift/2021/cranelift-09-20.md | 188 ++++++++++++++++++++- 1 file changed, 186 insertions(+), 2 deletions(-) diff --git a/meetings/cranelift/2021/cranelift-09-20.md b/meetings/cranelift/2021/cranelift-09-20.md index f584dbede7..4a54f18cb7 100644 --- a/meetings/cranelift/2021/cranelift-09-20.md +++ b/meetings/cranelift/2021/cranelift-09-20.md @@ -13,8 +13,192 @@ 1. Other agenda items 1. Further discussion of ISLE, the proposed instruction selector DSL, as outlined in [RFC #15](https://github.com/bytecodealliance/rfcs/pull/15) -## Notes - ### Attendees +* cfallin +* fitzgen +* uweigand +* alexcrichton +* bjorn3 +* afonso360 +* akirilov +* jlbirch + ### Notes + +#### ISLE + +* cfallin - Hoping bbouvier could be here but conflict! Will catch up later. Anyone + have any concerns to discuss? +* jlbirch - what were concerns? +* cfallin - not necessarily as "simple" as what we have right now. Currently + everything is straightforward to jump in and "see" Rust code. Tracing things + is easy and you can see what to modify. Also works with IDEs and rust-analyzer + and other Rust tools. bbouvier wants to preserve this if possible since it's + open and inviting, minimal knowledge required. Downside is that the benefits + of the DSL aren't there, fitzgen and I mentioned on thread. There are + some things we can only do with a DSL such as verification, optimizations, + refactorings (new regalloc API), ... I think it's also more open and welcoming + if you can understand the patterns and see them, that way you don't track + subtle invariants from custom open-coding. More welcoming with custom language + or to have Rust to read? +* fitzgen - The goal of the DSL should be to thinking about the domain at hand + rather than the low-level bits and I think it does a good job of that. If you + see some lowering is missing adding the new operation should be writing a + pattern and just focusing on the pattern, not also how it fits into the + hand-written matcher-trees. With the DSL compiler handling all that it's nice + that it handles performance (optimized lowering) but you're also just talking + about the pattern you want to match instead of manually doing the lowering. +* cfallin - aspect oriented programming anyone? The DSL brings all the little + things spread throughout the code into one place in the compiler -- raising + the level of abstraction and not having to worry about doing unsafe or illegal + things. Understand Ben's concerns though. Anyone else have similar concerns? +* akirilov - I'm in the middle, leaning towards what you're describing with + ISLE. +* jlbirch - Worked on compilers awhile ago! No DSL involved. Mostly with + bytecodealliance I've seen DSLs. Would ISLE looks similar to the wasm backend + for v8? +* cfallin - not familiar with v8, but you/Andrew have described open-coding, is + that right? +* jlbirch - looking at a lowering language of some sorts +* cfallin - link? +* jlbirch - should be able to compare what we have to v8 and how it's easy to + look at and dive in. Haven't had experience debugging v8 though and that's + presumably where the issues come in. +* cfallin - speaks to a tooling concern and trace what some code becomes and + why. The output of the DSL should be human readable and should ideally have + comments for where things came from. Does this in the prototype, not beautiful + code but still readable. Has comments though and says "this came from ISLE + line 123". Should be able to step through and see various cases. Maybe higher + level thing like log/trace to show what was applied? I understand the + debugging point though, very important. +* fitzgen - regarding what other compilers do, gcc has its own DSL, LLVM has + tablegen, Go has a DSL for this sort of thing. ISLE does have unique things + but this shouldn't be too too foreign. +* cfallin - "term rewriting system" - studied for awhile -- not to say it's + automatically easier. Is understood though. +* jlbirch - Yeah understand it's not too too crazy. Trying to imagine someone + with no experience in compilers jumping in. +* cfallin - Trying to prevent bugs that have come in with ISLE preventing things + from being incorrect. Lots of stuff to worry about today with + regalloc/metadata/sinking loads/etc. Extra mental state we don't want authors + to have to carry with ISLE. +* jlbirch - generally agree +* cfallin - should catch up with Ben later. Sounds like others agree? +* akirilov - haven't touched the old backend which seems like it has a somewhat + similar DSL. Would be good to have a guide for how to add a new instruction. + Main challenge is that there's no guide right now and would be helpful to + have. Good to know how to add one instruction and to debug. +* cfallin - good idea! +* akirilov - ideally information is close to the project (as opposed to blog + posts, which are great!) since contributors may not always be aware of + articles. We have Wasmtime guide with section for contributing? Doesn't cover + Cranelift though. +* cfallin - Whole `docs` repo to write stuff into, would be great to do. +* fitzgen - would be good to have Cranelift-specific book. +* cfallin - yes! +* akirilov - should link from the Wasmtime book since it appears at the top of + the repository. Cranelift should be visible too. +* cfallin - agreed! Should document new instructions, new lowerings. Could + probably source from RFCs and such. +* cfallin - brief mention of progress. The prototype of ISLE exists and they can + dig into it. Happy to explain more in a call. Nick is going to try to carry + forward and implement more things end-to-end with polish. Nick? +* fitzgen - Plan is to get one lowering implemented all the way through with + ISLE and then try ISLE first in existing lowering, falling back to handwritten + thing. Afterwards knocking out all the patterns. Probably still a week or so. + This'll quickly be parallelizable where it's mostly just porting patterns, + talk to me! +* jlbirch - will do my homework and reread these issues and will take you both + up on the offer and plan to help out +* fitzgen - Looking at pattern -> Rust code translation was very helpful and + gave me confidence that it's doing what it should do. Confident approach is + nice and could understand well that what I'm doing maps well. +* cfallin - any other thoughts on ISLE? + + +#### Endianness + +* cfallin - Thank you s390x for making sure we're correct here! Consensus last + year we have tri-state approach, we have a little/big/native flag on all + loads/stores. Native important for interacting with the rest of the system. + Concern that with the interpreter that this makes clif behavior + platform-dependent. Should have a single defined semantics for clif to prevent + breaking things up the stack. The suggestion in the issue is that we + reconsider this and go back to a world where have explicit endian on + everything, and for native things we bake it in based on the knowledge when we + generate the clif. Basic approach is to do what other compilers do like LLVM + with early-bind rather than late-bind. Any objections? +* akirilov - agree! Especially about clif semantics I agree we don't want them + dependent on the interpreter's host platform. +* cfallin - ok sounds like not much controversy. Sticky point is the API change. + When you create a clif function or you get a builder you need to give a notion + of endianness if not more platform information. Corresponds to LLVM which has + datalayout at the top of every file. Don't think that this will break things + other than that it's an API change which you should already know. +* uweigand - Confused about how other IRs have been created from the start for a + particular target and will build different IR for different targets. Have to + know the target for the IR to do anything with it anyway. Need to keep + specifying the same target when working with the same file. Or LLVM annotates + at the top. Sometimes datalayout also has target too. Having an interpreter + which doesn't know the intended target will really fully work even if + endianness is fixed. Won't there be other reasons? +* cfallin - one distinction is that the specific target is less important and + more important about details like endianness. Native loads/stores defined by + this. Pointer width can also be important. If you give me x86 IR it should be + possible to in theory compile on a 64-bit big-endian system with byteswaps? +* uweigand - don't have a full overview of the IR, but wondering if we have + things like pointer offsets which changes offsets and such? +* cfallin - not in the same way of LLVM, the code producer might assume this and + we may want to check. How would this work. If we're lowering heap_addr on + 64-bit system from IR targeted from 32-bit system maybe... +* uweigand - the specific question seems fine here, tried last year and it + seemed possible. Lots of code to change though, including code outside of the + Cranelift repository. +* cfallin - no API stability right now though. Folks here produce clif IR so + this would impact you. Providing endianness to a builder isn't the biggest + dealbreaker though since it's often ... +* fitzgen - to what uweigand said, the front/middle generate different IR + depending on the backend. Are we doing this today in Cranelift? All else being + equal it would be nice if we always generated the same IR. Some issues with + endianness though. I liked you recent comment of setting the endianness once + and still a tri-state native option, but native is explicitly defined. If the + declaration of what native is the only thing that's different that seems like + a nice property. +* uweigand - to clarify I wasn't referring to Cranelift, referring to compilers + like gcc/LLVM where it generates different IR since IR already encodes + features like the calling convention, struct sizes, etc. +* cfallin - we don't have calling convention details but we do have struct + layout depending on what the producer is doing. No concept of struct in + cranelift, but we do have producers that compile structs. Pointer-width fields + have different sizes. +- acrichto - wasmtime has platform dependent things for VMContext yeah +- afonso - control type for stack\_addr and such is pointer type +- cfallin - if you use 32-bit on 64-bit should be compiled correctly? Would be + nice to be independent but there's lots of details +- fitzgen - imagining on the cranelift-wasm frontend it's the same +- cfallin - I think it's already true except argument loads/stores? +- uweigand - other way around. All loads/store have explicit endianness. More + places "leave native" than use little. Most probably use little-endian though + since it's wasm. +- cfallin - almost have this property nick? maybe don't enforce? +- fitzgen - we have environment traits which customize things we want different. + Not mad about hook points for those using the frontend. If cranelift-wasm + decides to ask about the current platform and change the lowering that feels + bad. +- cfallin - agreed that's bad. This should be used to make behavior + deterministic. Don't have other platform-specific properties. +- bjorn3 - how test native loads/stores? +- cfallin - different tests? No duplication? Not sure I understand. +- cfallin - other points? Ok sounds like a reasonable approach, Afonso would you + like to try this? +- afonso - Will probably need guidance but happy to try. + +#### Status + +- cfallin: ISLE! +- uweigand: s390x - final patches merged and testsuite passes out-of-the-box. PR + to add s390x to CI. As we were speaking the run finished! +- akirilov: looking into pointer authentication support and code branch target + indication. These two are related. Just starting to working on an RFC since + this will probably merit discussions. From 3735453afa884d56b351bacd33376a5ca62e8001 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 20 Sep 2021 19:42:26 +0200 Subject: [PATCH 19/93] Add s390x build workflow (#3375) --- .github/workflows/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 24c41b1b0b..e5a0ea5ca3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -440,6 +440,11 @@ jobs: target: aarch64-unknown-linux-gnu gcc_package: gcc-aarch64-linux-gnu gcc: aarch64-linux-gnu-gcc + - build: s390x-linux + os: ubuntu-latest + target: s390x-unknown-linux-gnu + gcc_package: gcc-s390x-linux-gnu + gcc: s390x-linux-gnu-gcc steps: - uses: actions/checkout@v2 with: From fc6328ae069f091a8cd53702d1f9876045d564e6 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 20 Sep 2021 14:17:19 -0500 Subject: [PATCH 20/93] Temporarily disable SIMD fuzzing on CI (#3376) We've got a large crop of fuzz-bugs from fuzzing with enabled-with-SIMD on oss-fuzz but at this point the fuzz stats from oss-fuzz say that the fuzzers like v8 are spending less than 50% of its time actually fuzzing and presumably mostly hitting crashes and such. While we fix the other issues this disables simd for fuzzing with v8 so we can try to see if we can weed out other issues. --- crates/fuzzing/src/generators.rs | 6 ------ crates/fuzzing/src/lib.rs | 1 - fuzz/fuzz_targets/differential_v8.rs | 2 +- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 361298ca3c..242a5bc3bb 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -142,12 +142,6 @@ impl wasm_smith::Config for WasmtimeDefaultConfig { 4 } - // Turn some wasm features default-on for those that have a finished - // implementation in Wasmtime. - fn simd_enabled(&self) -> bool { - true - } - fn reference_types_enabled(&self) -> bool { true } diff --git a/crates/fuzzing/src/lib.rs b/crates/fuzzing/src/lib.rs index 16b9441caf..4e93b52216 100644 --- a/crates/fuzzing/src/lib.rs +++ b/crates/fuzzing/src/lib.rs @@ -40,7 +40,6 @@ pub fn fuzz_default_config(strategy: wasmtime::Strategy) -> anyhow::Result> + wasm_smith::ConfiguredModule> )| { let (config, mut wasm) = data; wasm.module.ensure_termination(1000); From c7d595ae464d9731e7253e7c4eb716c04e68d217 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 14 Sep 2021 13:23:20 +0100 Subject: [PATCH 21/93] cranelift: Add `bextend` tests to interpreter --- .../filetests/filetests/runtests/bextend.clif | 84 +++++++++++++++++++ .../filetests/runtests/i128-bextend.clif | 42 ++++++++++ 2 files changed, 126 insertions(+) create mode 100644 cranelift/filetests/filetests/runtests/bextend.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-bextend.clif diff --git a/cranelift/filetests/filetests/runtests/bextend.clif b/cranelift/filetests/filetests/runtests/bextend.clif new file mode 100644 index 0000000000..9f78fd9d2b --- /dev/null +++ b/cranelift/filetests/filetests/runtests/bextend.clif @@ -0,0 +1,84 @@ +test interpret + +function %bextend_b1_b8(b1) -> b8 { +block0(v0: b1): + v1 = bextend.b8 v0 + return v1 +} +; run: %bextend_b1_b8(true) == true +; run: %bextend_b1_b8(false) == false + +function %bextend_b1_b16(b1) -> b16 { +block0(v0: b1): + v1 = bextend.b16 v0 + return v1 +} +; run: %bextend_b1_b16(true) == true +; run: %bextend_b1_b16(false) == false + +function %bextend_b1_b32(b1) -> b32 { +block0(v0: b1): + v1 = bextend.b32 v0 + return v1 +} +; run: %bextend_b1_b32(true) == true +; run: %bextend_b1_b32(false) == false + +function %bextend_b1_b64(b1) -> b64 { +block0(v0: b1): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b1_b64(true) == true +; run: %bextend_b1_b64(false) == false + + +function %bextend_b8_b16(b8) -> b16 { +block0(v0: b8): + v1 = bextend.b16 v0 + return v1 +} +; run: %bextend_b8_b16(true) == true +; run: %bextend_b8_b16(false) == false + +function %bextend_b8_b32(b8) -> b32 { +block0(v0: b8): + v1 = bextend.b32 v0 + return v1 +} +; run: %bextend_b8_b32(true) == true +; run: %bextend_b8_b32(false) == false + +function %bextend_b8_b64(b8) -> b64 { +block0(v0: b8): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b8_b64(true) == true +; run: %bextend_b8_b64(false) == false + + +function %bextend_b16_b32(b16) -> b32 { +block0(v0: b16): + v1 = bextend.b32 v0 + return v1 +} +; run: %bextend_b16_b32(true) == true +; run: %bextend_b16_b32(false) == false + +function %bextend_b16_b64(b16) -> b64 { +block0(v0: b16): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b16_b64(true) == true +; run: %bextend_b16_b64(false) == false + + +function %bextend_b32_b64(b32) -> b64 { +block0(v0: b32): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b32_b64(true) == true +; run: %bextend_b32_b64(false) == false diff --git a/cranelift/filetests/filetests/runtests/i128-bextend.clif b/cranelift/filetests/filetests/runtests/i128-bextend.clif new file mode 100644 index 0000000000..34372f98e4 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bextend.clif @@ -0,0 +1,42 @@ +test interpret + +function %bextend_b1_b128(b1) -> b128 { +block0(v0: b1): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b1_b128(true) == true +; run: %bextend_b1_b128(false) == false + +function %bextend_b8_b128(b8) -> b128 { +block0(v0: b8): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b8_b128(true) == true +; run: %bextend_b8_b128(false) == false + +function %bextend_b16_b128(b16) -> b128 { +block0(v0: b16): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b16_b128(true) == true +; run: %bextend_b16_b128(false) == false + +function %bextend_b32_b128(b32) -> b128 { +block0(v0: b32): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b32_b128(true) == true +; run: %bextend_b32_b128(false) == false + + +function %bextend_b64_b128(b64) -> b128 { +block0(v0: b64): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b64_b128(true) == true +; run: %bextend_b64_b128(false) == false From 3ee180420ec268f4fd3d8ee8dfc276762f1c4848 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 14 Sep 2021 13:33:15 +0100 Subject: [PATCH 22/93] cranelift: Add `breduce` tests to interpreter --- .../filetests/filetests/runtests/breduce.clif | 85 +++++++++++++++++++ .../filetests/runtests/i128-breduce.clif | 41 +++++++++ 2 files changed, 126 insertions(+) create mode 100644 cranelift/filetests/filetests/runtests/breduce.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-breduce.clif diff --git a/cranelift/filetests/filetests/runtests/breduce.clif b/cranelift/filetests/filetests/runtests/breduce.clif new file mode 100644 index 0000000000..e436b3f800 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/breduce.clif @@ -0,0 +1,85 @@ +test interpret + +function %breduce_b8_b1(b8) -> b1 { +block0(v0: b8): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b8_b1(true) == true +; run: %breduce_b8_b1(false) == false + + +function %breduce_b16_b1(b16) -> b1 { +block0(v0: b16): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b16_b1(true) == true +; run: %breduce_b16_b1(false) == false + +function %breduce_b16_b8(b16) -> b8 { +block0(v0: b16): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b16_b8(true) == true +; run: %breduce_b16_b8(false) == false + + +function %breduce_b32_b1(b32) -> b1 { +block0(v0: b32): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b32_b1(true) == true +; run: %breduce_b32_b1(false) == false + +function %breduce_b32_b8(b32) -> b8 { +block0(v0: b32): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b32_b8(true) == true +; run: %breduce_b32_b8(false) == false + +function %breduce_b32_b16(b32) -> b16 { +block0(v0: b32): + v1 = breduce.b16 v0 + return v1 +} +; run: %breduce_b32_b16(true) == true +; run: %breduce_b32_b16(false) == false + + + +function %breduce_b64_b1(b64) -> b1 { +block0(v0: b64): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b64_b1(true) == true +; run: %breduce_b64_b1(false) == false + +function %breduce_b64_b8(b64) -> b8 { +block0(v0: b64): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b64_b8(true) == true +; run: %breduce_b64_b8(false) == false + +function %breduce_b64_b16(b64) -> b16 { +block0(v0: b64): + v1 = breduce.b16 v0 + return v1 +} +; run: %breduce_b64_b16(true) == true +; run: %breduce_b64_b16(false) == false + +function %breduce_b64_b32(b64) -> b32 { +block0(v0: b64): + v1 = breduce.b32 v0 + return v1 +} +; run: %breduce_b64_b32(true) == true +; run: %breduce_b64_b32(false) == false diff --git a/cranelift/filetests/filetests/runtests/i128-breduce.clif b/cranelift/filetests/filetests/runtests/i128-breduce.clif new file mode 100644 index 0000000000..93efa6c7a6 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-breduce.clif @@ -0,0 +1,41 @@ +test interpret + +function %breduce_b128_b1(b128) -> b1 { +block0(v0: b128): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b128_b1(true) == true +; run: %breduce_b128_b1(false) == false + +function %breduce_b128_b8(b128) -> b8 { +block0(v0: b128): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b128_b8(true) == true +; run: %breduce_b128_b8(false) == false + +function %breduce_b128_b16(b128) -> b16 { +block0(v0: b128): + v1 = breduce.b16 v0 + return v1 +} +; run: %breduce_b128_b16(true) == true +; run: %breduce_b128_b16(false) == false + +function %breduce_b128_b32(b128) -> b32 { +block0(v0: b128): + v1 = breduce.b32 v0 + return v1 +} +; run: %breduce_b128_b32(true) == true +; run: %breduce_b128_b32(false) == false + +function %breduce_b128_b64(b128) -> b64 { +block0(v0: b128): + v1 = breduce.b64 v0 + return v1 +} +; run: %breduce_b128_b64(true) == true +; run: %breduce_b128_b64(false) == false From 9a95ce75f15d9d2b8dcc542602c43f4cf2bc5a19 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 14 Sep 2021 14:58:08 +0100 Subject: [PATCH 23/93] cranelift: Add `bmask` to interpreter --- .../filetests/filetests/runtests/bmask.clif | 161 ++++++++++++++++++ .../filetests/runtests/i128-bmask.clif | 82 +++++++++ .../filetests/runtests/simd-bmask.clif | 30 ++++ cranelift/interpreter/src/step.rs | 14 +- cranelift/interpreter/src/value.rs | 10 +- 5 files changed, 291 insertions(+), 6 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/bmask.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-bmask.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-bmask.clif diff --git a/cranelift/filetests/filetests/runtests/bmask.clif b/cranelift/filetests/filetests/runtests/bmask.clif new file mode 100644 index 0000000000..d68e59ec00 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/bmask.clif @@ -0,0 +1,161 @@ +test interpret + +function %bmask_b64_i64(b64) -> i64 { +block0(v0: b64): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b64_i64(true) == -1 +; run: %bmask_b64_i64(false) == 0 + +function %bmask_b64_i32(b64) -> i32 { +block0(v0: b64): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b64_i32(true) == -1 +; run: %bmask_b64_i32(false) == 0 + +function %bmask_b64_i16(b64) -> i16 { +block0(v0: b64): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b64_i16(true) == -1 +; run: %bmask_b64_i16(false) == 0 + +function %bmask_b64_i8(b64) -> i8 { +block0(v0: b64): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b64_i8(true) == -1 +; run: %bmask_b64_i8(false) == 0 + +function %bmask_b32_i64(b32) -> i64 { +block0(v0: b32): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b32_i64(true) == -1 +; run: %bmask_b32_i64(false) == 0 + +function %bmask_b32_i32(b32) -> i32 { +block0(v0: b32): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b32_i32(true) == -1 +; run: %bmask_b32_i32(false) == 0 + +function %bmask_b32_i16(b32) -> i16 { +block0(v0: b32): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b32_i16(true) == -1 +; run: %bmask_b32_i16(false) == 0 + +function %bmask_b32_i8(b32) -> i8 { +block0(v0: b32): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b32_i8(true) == -1 +; run: %bmask_b32_i8(false) == 0 + +function %bmask_b16_i64(b16) -> i64 { +block0(v0: b16): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b16_i64(true) == -1 +; run: %bmask_b16_i64(false) == 0 + +function %bmask_b16_i32(b16) -> i32 { +block0(v0: b16): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b16_i32(true) == -1 +; run: %bmask_b16_i32(false) == 0 + +function %bmask_b16_i16(b16) -> i16 { +block0(v0: b16): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b16_i16(true) == -1 +; run: %bmask_b16_i16(false) == 0 + +function %bmask_b16_i8(b16) -> i8 { +block0(v0: b16): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b16_i8(true) == -1 +; run: %bmask_b16_i8(false) == 0 + +function %bmask_b8_i64(b8) -> i64 { +block0(v0: b8): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b8_i64(true) == -1 +; run: %bmask_b8_i64(false) == 0 + +function %bmask_b8_i32(b8) -> i32 { +block0(v0: b8): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b8_i32(true) == -1 +; run: %bmask_b8_i32(false) == 0 + +function %bmask_b8_i16(b8) -> i16 { +block0(v0: b8): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b8_i16(true) == -1 +; run: %bmask_b8_i16(false) == 0 + +function %bmask_b8_i8(b8) -> i8 { +block0(v0: b8): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b8_i8(true) == -1 +; run: %bmask_b8_i8(false) == 0 + +function %bmask_b1_i64(b1) -> i64 { +block0(v0: b1): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b1_i64(true) == -1 +; run: %bmask_b1_i64(false) == 0 + +function %bmask_b1_i32(b1) -> i32 { +block0(v0: b1): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b1_i32(true) == -1 +; run: %bmask_b1_i32(false) == 0 + +function %bmask_b1_i16(b1) -> i16 { +block0(v0: b1): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b1_i16(true) == -1 +; run: %bmask_b1_i16(false) == 0 + +function %bmask_b1_i8(b1) -> i8 { +block0(v0: b1): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b1_i8(true) == -1 +; run: %bmask_b1_i8(false) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-bmask.clif b/cranelift/filetests/filetests/runtests/i128-bmask.clif new file mode 100644 index 0000000000..f87df7f2ab --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bmask.clif @@ -0,0 +1,82 @@ +test interpret + +function %bmask_b128_i128(b128) -> i128 { +block0(v0: b128): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b128_i128(true) == -1 +; run: %bmask_b128_i128(false) == 0 + +function %bmask_b128_i64(b128) -> i64 { +block0(v0: b128): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b128_i64(true) == -1 +; run: %bmask_b128_i64(false) == 0 + +function %bmask_b128_i32(b128) -> i32 { +block0(v0: b128): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b128_i32(true) == -1 +; run: %bmask_b128_i32(false) == 0 + +function %bmask_b128_i16(b128) -> i16 { +block0(v0: b128): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b128_i16(true) == -1 +; run: %bmask_b128_i16(false) == 0 + +function %bmask_b128_i8(b128) -> i8 { +block0(v0: b128): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b128_i8(true) == -1 +; run: %bmask_b128_i8(false) == 0 + + +function %bmask_b64_i128(b64) -> i128 { +block0(v0: b64): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b64_i128(true) == -1 +; run: %bmask_b64_i128(false) == 0 + +function %bmask_b32_i128(b32) -> i128 { +block0(v0: b32): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b32_i128(true) == -1 +; run: %bmask_b32_i128(false) == 0 + +function %bmask_b16_i128(b16) -> i128 { +block0(v0: b16): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b16_i128(true) == -1 +; run: %bmask_b16_i128(false) == 0 + +function %bmask_b8_i128(b8) -> i128 { +block0(v0: b8): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b8_i128(true) == -1 +; run: %bmask_b8_i128(false) == 0 + +function %bmask_b1_i128(b1) -> i128 { +block0(v0: b1): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b1_i128(true) == -1 +; run: %bmask_b1_i128(false) == 0 diff --git a/cranelift/filetests/filetests/runtests/simd-bmask.clif b/cranelift/filetests/filetests/runtests/simd-bmask.clif new file mode 100644 index 0000000000..ba504f7868 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-bmask.clif @@ -0,0 +1,30 @@ +test interpret + + +function %bmask_i8x16(b8x16) -> i8x16 { +block0(v0: b8x16): + v1 = bmask.i8x16 v0 + return v1 +} +; run: %bmask_i8x16([true false true false true false true false true false true false true false true false]) == [-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0] + +function %bmask_i16x8(b16x8) -> i16x8 { +block0(v0: b16x8): + v1 = bmask.i16x8 v0 + return v1 +} +; run: %bmask_i16x8([true false true false true false true false]) == [-1 0 -1 0 -1 0 -1 0] + +function %bmask_i32x4(b32x4) -> i32x4 { +block0(v0: b32x4): + v1 = bmask.i32x4 v0 + return v1 +} +; run: %bmask_i32x4([true false true false]) == [-1 0 -1 0] + +function %bmask_i64x2(b64x2) -> i64x2 { +block0(v0: b64x2): + v1 = bmask.i64x2 v0 + return v1 +} +; run: %bmask_i64x2([true false]) == [-1 0] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 7fc16b06f7..a64adccdb0 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -767,7 +767,6 @@ where | Opcode::Breduce | Opcode::Bextend | Opcode::Bint - | Opcode::Bmask | Opcode::Ireduce => assign(Value::convert( arg(0)?, ValueConversionKind::Exact(ctrl_ty), @@ -802,6 +801,19 @@ where .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } + Opcode::Bmask => assign({ + let bool = arg(0)?; + let bool_ty = ctrl_ty.as_bool_pedantic(); + if ctrl_ty.is_vector() { + let lanes = extractlanes(&bool, bool_ty.lane_type())? + .into_iter() + .map(|lane| lane.convert(ValueConversionKind::Exact(ctrl_ty.lane_type()))) + .collect::>>()?; + vectorizelanes(&lanes, ctrl_ty)? + } else { + bool.convert(ValueConversionKind::Exact(ctrl_ty))? + } + }), Opcode::Sextend => assign(Value::convert( arg(0)?, ValueConversionKind::SignExtend(ctrl_ty), diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index c5727892cc..70f6ac78d6 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -277,11 +277,11 @@ impl Value for DataValue { (DataValue::I64(n), types::I128) => DataValue::I128(n as i128), (DataValue::B(b), t) if t.is_bool() => DataValue::B(b), (DataValue::B(b), t) if t.is_int() => { - let val = if b { - // Bools are represented in memory as all 1's - (1i128 << t.bits()) - 1 - } else { - 0 + // Bools are represented in memory as all 1's + let val = match (b, t) { + (true, types::I128) => -1, + (true, t) => (1i128 << t.bits()) - 1, + _ => 0, }; DataValue::int(val, t)? } From 38463d11ed19b0bce27010c518787737c74cebeb Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 21 Sep 2021 13:05:31 -0500 Subject: [PATCH 24/93] Load generated trampolines into jitdump when profiling (#3344) * Load generated trampolines into jitdump when profiling This commit updates the jitdump profiler to generate JIT profiling records for generated trampolines in a wasm module in addition to the functions already in a module. It's also updated to learn about trampolines generated via `Func::new` and friends. These trampolines were all not previously registered meaning that stack traces with these pc values would be confusing to see in the profile output. While the names aren't the best it should at least be more clear than before if a function is hot! * Fix more builds --- crates/jit/src/instantiate.rs | 14 +++-- crates/jit/src/profiling.rs | 8 ++- crates/jit/src/profiling/jitdump_disabled.rs | 1 + crates/jit/src/profiling/jitdump_linux.rs | 54 +++++++++++++++++++- crates/jit/src/profiling/vtune_disabled.rs | 1 + crates/jit/src/profiling/vtune_linux.rs | 3 ++ crates/wasmtime/src/module.rs | 4 +- crates/wasmtime/src/trampoline/func.rs | 1 + 8 files changed, 76 insertions(+), 10 deletions(-) diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index ef499ad446..ebe823b51b 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -397,13 +397,17 @@ impl CompiledModule { } /// Returns the per-signature trampolines for this module. - pub fn trampolines(&self) -> impl Iterator + '_ { + pub fn trampolines(&self) -> impl Iterator + '_ { let code = self.code(); self.trampolines.iter().map(move |info| { - (info.signature, unsafe { - let ptr = &code[info.start as usize]; - std::mem::transmute::<*const u8, VMTrampoline>(ptr) - }) + ( + info.signature, + unsafe { + let ptr = &code[info.start as usize]; + std::mem::transmute::<*const u8, VMTrampoline>(ptr) + }, + info.length as usize, + ) }) } diff --git a/crates/jit/src/profiling.rs b/crates/jit/src/profiling.rs index 6e90b6b7e2..7fddcb296f 100644 --- a/crates/jit/src/profiling.rs +++ b/crates/jit/src/profiling.rs @@ -29,7 +29,10 @@ pub use vtune::VTuneAgent; /// Common interface for profiling tools. pub trait ProfilingAgent: Send + Sync + 'static { /// Notify the profiler of a new module loaded into memory - fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) -> (); + fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>); + /// Notify the profiler that the object file provided contains + /// dynamically-generated trampolines which are now being loaded. + fn trampoline_load(&self, file: &object::File<'_>); } /// Default agent for unsupported profiling build. @@ -54,7 +57,8 @@ impl Error for NullProfilerAgentError { } impl ProfilingAgent for NullProfilerAgent { - fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) -> () {} + fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} + fn trampoline_load(&self, _file: &object::File<'_>) {} } #[allow(dead_code)] diff --git a/crates/jit/src/profiling/jitdump_disabled.rs b/crates/jit/src/profiling/jitdump_disabled.rs index 122468d203..a007a72661 100644 --- a/crates/jit/src/profiling/jitdump_disabled.rs +++ b/crates/jit/src/profiling/jitdump_disabled.rs @@ -20,4 +20,5 @@ impl JitDumpAgent { impl ProfilingAgent for JitDumpAgent { fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} + fn trampoline_load(&self, _file: &object::File<'_>) {} } diff --git a/crates/jit/src/profiling/jitdump_linux.rs b/crates/jit/src/profiling/jitdump_linux.rs index 9680ce8984..4663011468 100644 --- a/crates/jit/src/profiling/jitdump_linux.rs +++ b/crates/jit/src/profiling/jitdump_linux.rs @@ -21,6 +21,7 @@ use std::ptr; use std::sync::Mutex; use std::{borrow, mem, process}; use target_lexicon::Architecture; +use wasmtime_environ::EntityRef; use object::elf; @@ -202,6 +203,9 @@ impl ProfilingAgent for JitDumpAgent { fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) { self.state.lock().unwrap().module_load(module, dbg_image); } + fn trampoline_load(&self, file: &object::File<'_>) { + self.state.lock().unwrap().trampoline_load(file) + } } impl State { @@ -280,7 +284,7 @@ impl State { } /// Sent when a method is compiled and loaded into memory by the VM. - pub fn module_load(&mut self, module: &CompiledModule, dbg_image: Option<&[u8]>) -> () { + pub fn module_load(&mut self, module: &CompiledModule, dbg_image: Option<&[u8]>) { let pid = process::id(); let tid = pid; // ThreadId does appear to track underlying thread. Using PID. @@ -299,6 +303,52 @@ impl State { self.dump_code_load_record(&name, addr, len, timestamp, pid, tid); } } + for (idx, func, len) in module.trampolines() { + let (addr, len) = (func as usize as *const u8, len); + let timestamp = self.get_time_stamp(); + let name = format!("wasm::trampoline[{}]", idx.index()); + self.dump_code_load_record(&name, addr, len, timestamp, pid, tid); + } + } + + fn trampoline_load(&mut self, image: &object::File<'_>) { + use object::{ObjectSection, ObjectSymbol, SectionKind, SymbolKind}; + let pid = process::id(); + let tid = pid; + + let text_base = match image.sections().find(|s| s.kind() == SectionKind::Text) { + Some(section) => match section.data() { + Ok(data) => data.as_ptr() as usize, + Err(_) => return, + }, + None => return, + }; + + for sym in image.symbols() { + if !sym.is_definition() { + continue; + } + if sym.kind() != SymbolKind::Text { + continue; + } + let address = sym.address(); + let size = sym.size(); + if address == 0 || size == 0 { + continue; + } + if let Ok(name) = sym.name() { + let addr = text_base + address as usize; + let timestamp = self.get_time_stamp(); + self.dump_code_load_record( + &name, + addr as *const u8, + size as usize, + timestamp, + pid, + tid, + ); + } + } } fn dump_code_load_record( @@ -309,7 +359,7 @@ impl State { timestamp: u64, pid: u32, tid: u32, - ) -> () { + ) { let name_len = method_name.len() + 1; let size_limit = mem::size_of::(); diff --git a/crates/jit/src/profiling/vtune_disabled.rs b/crates/jit/src/profiling/vtune_disabled.rs index 9dd943944f..7a84fd03d7 100644 --- a/crates/jit/src/profiling/vtune_disabled.rs +++ b/crates/jit/src/profiling/vtune_disabled.rs @@ -20,4 +20,5 @@ impl VTuneAgent { impl ProfilingAgent for VTuneAgent { fn module_load(&self, _module: &crate::CompiledModule, _dbg_image: Option<&[u8]>) {} + fn trampoline_load(&self, _file: &object::File<'_>) {} } diff --git a/crates/jit/src/profiling/vtune_linux.rs b/crates/jit/src/profiling/vtune_linux.rs index bc51cb58f1..eee43ee69a 100644 --- a/crates/jit/src/profiling/vtune_linux.rs +++ b/crates/jit/src/profiling/vtune_linux.rs @@ -112,6 +112,9 @@ impl ProfilingAgent for VTuneAgent { fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) { self.state.lock().unwrap().module_load(module, dbg_image); } + fn trampoline_load(&self, _file: &object::File<'_>) { + // TODO: needs an implementation + } } impl State { diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 677b094baa..eec7533a85 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -500,7 +500,9 @@ impl Module { let signatures = Arc::new(SignatureCollection::new_for_module( engine.signatures(), &types.wasm_signatures, - modules.iter().flat_map(|m| m.trampolines()), + modules + .iter() + .flat_map(|m| m.trampolines().map(|(idx, f, _)| (idx, f))), )); let module = modules.remove(main_module); diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index e215dfe8f2..3c7e8c84c6 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -86,6 +86,7 @@ where // also take care of unwind table registration. let mut code_memory = CodeMemory::new(obj); let code = code_memory.publish()?; + engine.config().profiler.trampoline_load(&code.obj); // Extract the host/wasm trampolines from the results of compilation since // we know their start/length. From bcf3544924880edc085e74497b0d3932909a456e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 21 Sep 2021 14:07:05 -0500 Subject: [PATCH 25/93] Optimize `Func::call` and its C API (#3319) * Optimize `Func::call` and its C API This commit is an alternative to #3298 which achieves effectively the same goal of optimizing the `Func::call` API as well as its C API sibling of `wasmtime_func_call`. The strategy taken here is different than #3298 though where a new API isn't created, rather a small tweak to an existing API is done. Specifically this commit handles the major sources of slowness with `Func::call` with: * Looking up the type of a function, to typecheck the arguments with and use to guide how the results should be loaded, no longer hits the rwlock in the `Engine` but instead each `Func` contains its own `FuncType`. This can be an unnecessary allocation for funcs not used with `Func::call`, so this is a downside of this implementation relative to #3298. A mitigating factor, though, is that instance exports are loaded lazily into the `Store` and in theory not too many funcs are active in the store as `Func` objects. * Temporary storage is amortized with a long-lived `Vec` in the `Store` rather than allocating a new vector on each call. This is basically the same strategy as #3294 only applied to different types in different places. Specifically `wasmtime::Store` now retains a `Vec` for `Func::call`, and the C API retains a `Vec` for calling `Func::call`. * Finally, an API breaking change is made to `Func::call` and its type signature (as well as `Func::call_async`). Instead of returning `Box<[Val]>` as it did before this function now takes a `results: &mut [Val]` parameter. This allows the caller to manage the allocation and we can amortize-remove it in `wasmtime_func_call` by using space after the parameters in the `Vec` we're passing in. This change is naturally a breaking change and we'll want to consider it carefully, but mitigating factors are that most embeddings are likely using `TypedFunc::call` instead and this signature taking a mutable slice better aligns with `Func::new` which receives a mutable slice for the results. Overall this change, in the benchmark of "call a nop function from the C API" is not quite as good as #3298. It's still a bit slower, on the order of 15ns, because there's lots of capacity checks around vectors and the type checks are slightly less optimized than before. Overall though this is still significantly better than today because allocations and the rwlock to acquire the type information are both avoided. I personally feel that this change is the best to do because it has less of an API impact than #3298. * Rebase issues --- crates/c-api/src/func.rs | 64 ++++--- crates/c-api/src/store.rs | 9 +- crates/fuzzing/src/oracles.rs | 27 +-- crates/fuzzing/src/oracles/v8.rs | 16 +- crates/wasmtime/src/func.rs | 183 ++++++++++++-------- crates/wasmtime/src/lib.rs | 2 +- crates/wasmtime/src/linker.rs | 21 +-- crates/wasmtime/src/store.rs | 22 +++ crates/wasmtime/src/types.rs | 2 + crates/wast/src/wast.rs | 6 +- crates/wiggle/tests/wasmtime_async.rs | 18 +- crates/wiggle/tests/wasmtime_integration.rs | 18 +- crates/wiggle/tests/wasmtime_sync.rs | 24 ++- src/commands/run.rs | 5 +- tests/all/async_functions.rs | 20 ++- tests/all/call_hook.rs | 9 +- tests/all/externals.rs | 20 ++- tests/all/func.rs | 62 ++++--- tests/all/funcref.rs | 30 ++-- tests/all/gc.rs | 12 +- tests/all/host_funcs.rs | 53 +++--- tests/all/import_calling_export.rs | 7 +- tests/all/invoke_func_via_table.rs | 5 +- tests/all/linker.rs | 2 +- tests/all/traps.rs | 11 +- tests/host_segfault.rs | 4 +- 26 files changed, 399 insertions(+), 253 deletions(-) diff --git a/crates/c-api/src/func.rs b/crates/c-api/src/func.rs index 7e6a2e8c2d..fe875d951f 100644 --- a/crates/c-api/src/func.rs +++ b/crates/c-api/src/func.rs @@ -3,7 +3,6 @@ use crate::{ wasm_extern_t, wasm_functype_t, wasm_store_t, wasm_val_t, wasm_val_vec_t, wasmtime_error_t, wasmtime_extern_t, wasmtime_val_t, wasmtime_val_union, CStoreContext, CStoreContextMut, }; -use anyhow::anyhow; use std::ffi::c_void; use std::mem::{self, MaybeUninit}; use std::panic::{self, AssertUnwindSafe}; @@ -109,6 +108,22 @@ pub unsafe extern "C" fn wasm_func_new_with_env( }) } +/// Places the `args` into `dst` and additionally reserves space in `dst` for `results_size` +/// returns. The params/results slices are then returned separately. +fn translate_args<'a>( + dst: &'a mut Vec, + args: impl ExactSizeIterator, + results_size: usize, +) -> (&'a [Val], &'a mut [Val]) { + debug_assert!(dst.is_empty()); + let num_args = args.len(); + dst.reserve(args.len() + results_size); + dst.extend(args); + dst.extend((0..results_size).map(|_| Val::null())); + let (a, b) = dst.split_at_mut(num_args); + (a, b) +} + #[no_mangle] pub unsafe extern "C" fn wasm_func_call( func: &mut wasm_func_t, @@ -118,23 +133,20 @@ pub unsafe extern "C" fn wasm_func_call( let f = func.func(); let results = (*results).as_uninit_slice(); let args = (*args).as_slice(); - if results.len() != f.ty(func.ext.store.context()).results().len() { - return Box::into_raw(Box::new(wasm_trap_t::new( - anyhow!("wrong number of results provided").into(), - ))); - } - let params = args.iter().map(|i| i.val()).collect::>(); + let mut dst = Vec::new(); + let (wt_params, wt_results) = + translate_args(&mut dst, args.iter().map(|i| i.val()), results.len()); // We're calling arbitrary code here most of the time, and we in general // want to try to insulate callers against bugs in wasmtime/wasi/etc if we // can. As a result we catch panics here and transform them to traps to // allow the caller to have any insulation possible against Rust panics. let result = panic::catch_unwind(AssertUnwindSafe(|| { - f.call(func.ext.store.context_mut(), ¶ms) + f.call(func.ext.store.context_mut(), wt_params, wt_results) })); match result { - Ok(Ok(out)) => { - for (slot, val) in results.iter_mut().zip(out.into_vec().into_iter()) { + Ok(Ok(())) => { + for (slot, val) in results.iter_mut().zip(wt_results.iter().cloned()) { crate::initialize(slot, wasm_val_t::from_val(val)); } ptr::null_mut() @@ -261,7 +273,7 @@ pub(crate) unsafe fn c_callback_to_rust_fn( #[no_mangle] pub unsafe extern "C" fn wasmtime_func_call( - store: CStoreContextMut<'_>, + mut store: CStoreContextMut<'_>, func: &Func, args: *const wasmtime_val_t, nargs: usize, @@ -269,27 +281,31 @@ pub unsafe extern "C" fn wasmtime_func_call( nresults: usize, trap_ret: &mut *mut wasm_trap_t, ) -> Option> { - if nresults != func.ty(&store).results().len() { - return Some(Box::new(wasmtime_error_t::from(anyhow!( - "wrong number of results provided" - )))); - } - let params = crate::slice_from_raw_parts(args, nargs) - .iter() - .map(|i| i.to_val()) - .collect::>(); + let mut store = store.as_context_mut(); + let mut params = mem::take(&mut store.data_mut().wasm_val_storage); + let (wt_params, wt_results) = translate_args( + &mut params, + crate::slice_from_raw_parts(args, nargs) + .iter() + .map(|i| i.to_val()), + nresults, + ); // We're calling arbitrary code here most of the time, and we in general // want to try to insulate callers against bugs in wasmtime/wasi/etc if we // can. As a result we catch panics here and transform them to traps to // allow the caller to have any insulation possible against Rust panics. - let result = panic::catch_unwind(AssertUnwindSafe(|| func.call(store, ¶ms))); + let result = panic::catch_unwind(AssertUnwindSafe(|| { + func.call(&mut store, wt_params, wt_results) + })); match result { - Ok(Ok(out)) => { + Ok(Ok(())) => { let results = crate::slice_from_raw_parts_mut(results, nresults); - for (slot, val) in results.iter_mut().zip(out.into_vec().into_iter()) { - crate::initialize(slot, wasmtime_val_t::from_val(val)); + for (slot, val) in results.iter_mut().zip(wt_results.iter()) { + crate::initialize(slot, wasmtime_val_t::from_val(val.clone())); } + params.truncate(0); + store.data_mut().wasm_val_storage = params; None } Ok(Err(trap)) => match trap.downcast::() { diff --git a/crates/c-api/src/store.rs b/crates/c-api/src/store.rs index ea8994ddc9..cdee5969ba 100644 --- a/crates/c-api/src/store.rs +++ b/crates/c-api/src/store.rs @@ -2,7 +2,9 @@ use crate::{wasm_engine_t, wasmtime_error_t, wasmtime_val_t, ForeignData}; use std::cell::UnsafeCell; use std::ffi::c_void; use std::sync::Arc; -use wasmtime::{AsContext, AsContextMut, InterruptHandle, Store, StoreContext, StoreContextMut}; +use wasmtime::{ + AsContext, AsContextMut, InterruptHandle, Store, StoreContext, StoreContextMut, Val, +}; /// This representation of a `Store` is used to implement the `wasm.h` API. /// @@ -71,6 +73,10 @@ pub struct StoreData { /// Temporary storage for usage during a wasm->host call to store values /// in a slice we pass to the C API. pub hostcall_val_storage: Vec, + + /// Temporary storage for usage during host->wasm calls, same as above but + /// for a different direction. + pub wasm_val_storage: Vec, } #[no_mangle] @@ -90,6 +96,7 @@ pub extern "C" fn wasmtime_store_new( #[cfg(feature = "wasi")] wasi: None, hostcall_val_storage: Vec::new(), + wasm_val_storage: Vec::new(), }, ), }) diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index ff517e9782..467aa6955e 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -297,8 +297,10 @@ pub fn differential_execution( let ty = f.ty(&store); let params = dummy::dummy_values(ty.params()); + let mut results = vec![Val::I32(0); ty.results().len()]; let this_result = f - .call(&mut store, ¶ms) + .call(&mut store, ¶ms, &mut results) + .map(|()| results.into()) .map_err(|e| e.downcast::().unwrap()); let existing_result = export_func_results @@ -312,7 +314,7 @@ pub fn differential_execution( match instance.get_export(&mut *store, "hangLimitInitializer") { None => return, Some(Extern::Func(f)) => { - f.call(store, &[]) + f.call(store, &[], &mut []) .expect("initializing the hang limit should not fail"); } Some(_) => panic!("unexpected hangLimitInitializer export"), @@ -478,7 +480,8 @@ pub fn make_api_calls(api: crate::generators::api::ApiCalls) { let f = &funcs[nth]; let ty = f.ty(&store); let params = dummy::dummy_values(ty.params()); - let _ = f.call(store, ¶ms); + let mut results = vec![Val::I32(0); ty.results().len()]; + let _ = f.call(store, ¶ms, &mut results); } } } @@ -636,7 +639,7 @@ pub fn table_ops( let args: Vec<_> = (0..ops.num_params()) .map(|_| Val::ExternRef(Some(ExternRef::new(CountDrops(num_dropped.clone()))))) .collect(); - let _ = run.call(&mut store, &args); + let _ = run.call(&mut store, &args, &mut []); } assert_eq!(num_dropped.load(SeqCst), expected_drops.load(SeqCst)); @@ -740,7 +743,7 @@ pub fn differential_wasmi_execution(wasm: &[u8], config: &crate::generators::Con // Introspect wasmtime module to find name of an exported function and of an // exported memory. - let (func_name, _ty) = first_exported_function(&wasmtime_module)?; + let (func_name, ty) = first_exported_function(&wasmtime_module)?; let memory_name = first_exported_memory(&wasmtime_module)?; let wasmi_mem_export = wasmi_instance.export_by_name(memory_name).unwrap(); @@ -755,8 +758,10 @@ pub fn differential_wasmi_execution(wasm: &[u8], config: &crate::generators::Con let wasmtime_main = wasmtime_instance .get_func(&mut wasmtime_store, func_name) .expect("function export is present"); - let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, &[]); - let wasmtime_val = wasmtime_vals.map(|v| v.iter().next().cloned()); + let mut wasmtime_results = vec![Val::I32(0); ty.results().len()]; + let wasmtime_val = wasmtime_main + .call(&mut wasmtime_store, &[], &mut wasmtime_results) + .map(|()| wasmtime_results.get(0).cloned()); debug!( "Successful execution: wasmi returned {:?}, wasmtime returned {:?}", @@ -918,15 +923,17 @@ fn run_in_wasmtime( .context("Wasmtime cannot instantiate module")?; // Find the first exported function. - let (func_name, _ty) = + let (func_name, ty) = first_exported_function(&wasmtime_module).context("Cannot find exported function")?; let wasmtime_main = wasmtime_instance .get_func(&mut wasmtime_store, &func_name[..]) .expect("function export is present"); // Execute the function and return the values. - let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, params); - wasmtime_vals.map(|v| v.to_vec()) + let mut results = vec![Val::I32(0); ty.results().len()]; + wasmtime_main + .call(&mut wasmtime_store, params, &mut results) + .map(|()| results) } // Introspect wasmtime module to find the name of the first exported function. diff --git a/crates/fuzzing/src/oracles/v8.rs b/crates/fuzzing/src/oracles/v8.rs index 0e66c38cb9..ff16757c00 100644 --- a/crates/fuzzing/src/oracles/v8.rs +++ b/crates/fuzzing/src/oracles/v8.rs @@ -93,7 +93,9 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config let wasmtime_main = wasmtime_instance .get_func(&mut wasmtime_store, func) .expect("function export is present"); - let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, &wasmtime_params); + let mut wasmtime_vals = vec![Val::I32(0); ty.results().len()]; + let wasmtime_result = + wasmtime_main.call(&mut wasmtime_store, &wasmtime_params, &mut wasmtime_vals); log::trace!("finished wasmtime invocation"); // V8: call the first exported func @@ -112,15 +114,15 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config log::trace!("finished v8 invocation"); // Verify V8 and wasmtime match - match (wasmtime_vals, v8_vals) { - (Ok(wasmtime), Ok(v8)) => { + match (wasmtime_result, v8_vals) { + (Ok(()), Ok(v8)) => { log::trace!("both executed successfully"); - match wasmtime.len() { + match wasmtime_vals.len() { 0 => assert!(v8.is_undefined()), - 1 => assert_val_match(&wasmtime[0], &v8, &mut scope), + 1 => assert_val_match(&wasmtime_vals[0], &v8, &mut scope), _ => { let array = v8::Local::<'_, v8::Array>::try_from(v8).unwrap(); - for (i, wasmtime) in wasmtime.iter().enumerate() { + for (i, wasmtime) in wasmtime_vals.iter().enumerate() { let v8 = array.get_index(&mut scope, i as u32).unwrap(); assert_val_match(wasmtime, &v8, &mut scope); // .. @@ -128,7 +130,7 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config } } } - (Ok(_), Err(msg)) => { + (Ok(()), Err(msg)) => { panic!("wasmtime succeeded at invocation, v8 failed: {}", msg) } (Err(err), Ok(_)) => { diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index dbc8feba6c..abadbdb341 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -4,7 +4,6 @@ use crate::{ StoreContext, StoreContextMut, Trap, Val, ValType, }; use anyhow::{bail, Context as _, Result}; -use std::cmp::max; use std::error::Error; use std::fmt; use std::future::Future; @@ -90,13 +89,13 @@ use wasmtime_runtime::{ /// /// // Work with `foo` as a `Func` at this point, such as calling it /// // dynamically... -/// match foo.call(&mut store, &[]) { -/// Ok(result) => { /* ... */ } +/// match foo.call(&mut store, &[], &mut []) { +/// Ok(()) => { /* ... */ } /// Err(trap) => { /// panic!("execution of `foo` resulted in a wasm trap: {}", trap); /// } /// } -/// foo.call(&mut store, &[])?; +/// foo.call(&mut store, &[], &mut [])?; /// /// // ... or we can make a static assertion about its signature and call it. /// // Our first call here can fail if the signatures don't match, and then the @@ -184,9 +183,14 @@ use wasmtime_runtime::{ #[repr(transparent)] // here for the C API pub struct Func(Stored); +pub(crate) struct FuncData { + kind: FuncKind, + ty: FuncType, +} + /// The three ways that a function can be created and referenced from within a /// store. -pub(crate) enum FuncData { +enum FuncKind { /// A function already owned by the store via some other means. This is /// used, for example, when creating a `Func` from an instance's exported /// function. The instance's `InstanceHandle` is already owned by the store @@ -647,42 +651,37 @@ impl Func { /// /// Panics if `store` does not own this function. pub fn ty(&self, store: impl AsContext) -> FuncType { - // Signatures should always be registered in the engine's registry of - // shared signatures, so we should be able to unwrap safely here. - let store = store.as_context(); - let sig_index = unsafe { store[self.0].export().anyfunc.as_ref().type_index }; - FuncType::from_wasm_func_type( - store - .engine() - .signatures() - .lookup_type(sig_index) - .expect("signature should be registered"), - ) + store.as_context()[self.0].ty.clone() } pub(crate) fn sig_index(&self, data: &StoreData) -> VMSharedSignatureIndex { unsafe { data[self.0].export().anyfunc.as_ref().type_index } } - /// Invokes this function with the `params` given, returning the results and - /// any trap, if one occurs. + /// Invokes this function with the `params` given and writes returned values + /// to `results`. /// /// The `params` here must match the type signature of this `Func`, or a /// trap will occur. If a trap occurs while executing this function, then a - /// trap will also be returned. + /// trap will also be returned. Additionally `results` must have the same + /// length as the number of results for this function. /// /// # Panics /// /// This function will panic if called on a function belonging to an async /// store. Asynchronous stores must always use `call_async`. /// initiates a panic. Also panics if `store` does not own this function. - pub fn call(&self, mut store: impl AsContextMut, params: &[Val]) -> Result> { + pub fn call( + &self, + mut store: impl AsContextMut, + params: &[Val], + results: &mut [Val], + ) -> Result<()> { assert!( !store.as_context().async_support(), "must use `call_async` when async support is enabled on the config", ); - let my_ty = self.ty(&store); - self.call_impl(&mut store.as_context_mut(), my_ty, params) + self.call_impl(&mut store.as_context_mut(), params, results) } /// Invokes this function with the `params` given, returning the results @@ -713,7 +712,8 @@ impl Func { &self, mut store: impl AsContextMut, params: &[Val], - ) -> Result> + results: &mut [Val], + ) -> Result<()> where T: Send, { @@ -722,9 +722,8 @@ impl Func { store.0.async_support(), "cannot use `call_async` without enabling async support in the config", ); - let my_ty = self.ty(&store); let result = store - .on_fiber(|store| self.call_impl(store, my_ty, params)) + .on_fiber(|store| self.call_impl(store, params, results)) .await??; Ok(result) } @@ -732,10 +731,43 @@ impl Func { fn call_impl( &self, store: &mut StoreContextMut<'_, T>, - my_ty: FuncType, params: &[Val], - ) -> Result> { - let mut values_vec = write_params(store.0, &my_ty, params)?; + results: &mut [Val], + ) -> Result<()> { + // We need to perform a dynamic check that the arguments given to us + // match the signature of this function and are appropriate to pass to + // this function. This involves checking to make sure we have the right + // number and types of arguments as well as making sure everything is + // from the same `Store`. + let ty = &store[self.0].ty; + if ty.params().len() != params.len() { + bail!( + "expected {} arguments, got {}", + ty.params().len(), + params.len() + ); + } + if ty.results().len() != results.len() { + bail!( + "expected {} results, got {}", + ty.results().len(), + results.len() + ); + } + for (ty, arg) in ty.params().zip(params) { + if arg.ty() != ty { + bail!( + "argument type mismatch: found {} but expected {}", + arg.ty(), + ty + ); + } + if !arg.comes_from_same_store(store.0) { + bail!("cross-`Store` values are not currently supported"); + } + } + + let mut values_vec = write_params(store.0, params, results)?; // Call the trampoline. unsafe { @@ -752,27 +784,19 @@ impl Func { })?; } - return Ok(read_results(store.0, &my_ty, &values_vec)); + read_results(store.0, self, values_vec, results); + return Ok(()); fn write_params( store: &mut StoreOpaque, - ty: &FuncType, params: &[Val], + results: &mut [Val], ) -> Result> { - // We need to perform a dynamic check that the arguments given to us - // match the signature of this function and are appropriate to pass to - // this function. This involves checking to make sure we have the right - // number and types of arguments as well as making sure everything is - // from the same `Store`. - if ty.params().len() != params.len() { - bail!( - "expected {} arguments, got {}", - ty.params().len(), - params.len() - ); - } + let values_vec_size = params.len().max(results.len()); - let mut values_vec = vec![0; max(params.len(), ty.results().len())]; + let mut values_vec = store.take_wasm_u128_storage(); + debug_assert!(values_vec.is_empty()); + values_vec.extend((0..values_vec_size).map(|_| 0)); // Whenever we pass `externref`s from host code to Wasm code, they // go into the `VMExternRefActivationsTable`. But the table might be @@ -790,18 +814,7 @@ impl Func { } // Store the argument values into `values_vec`. - let param_tys = ty.params(); - for ((arg, slot), ty) in params.iter().cloned().zip(&mut values_vec).zip(param_tys) { - if arg.ty() != ty { - bail!( - "argument type mismatch: found {} but expected {}", - arg.ty(), - ty - ); - } - if !arg.comes_from_same_store(store) { - bail!("cross-`Store` values are not currently supported"); - } + for (arg, slot) in params.iter().zip(&mut values_vec) { unsafe { arg.write_value_without_gc(store, slot); } @@ -810,15 +823,20 @@ impl Func { Ok(values_vec) } - fn read_results(store: &mut StoreOpaque, ty: &FuncType, values_vec: &[u128]) -> Box<[Val]> { - let mut results = Vec::with_capacity(ty.results().len()); - for (index, ty) in ty.results().enumerate() { + fn read_results( + store: &mut StoreOpaque, + func: &Func, + mut values_vec: Vec, + results: &mut [Val], + ) { + for (i, (ptr, dst)) in values_vec.iter().zip(results).enumerate() { + let ty = store[func.0].ty.results().nth(i).unwrap(); unsafe { - let ptr = &values_vec[index]; - results.push(Val::read_value_from(store, ptr, ty)); + *dst = Val::read_value_from(store, ptr, ty); } } - results.into() + values_vec.truncate(0); + store.save_wasm_u128_storage(values_vec); } } @@ -836,8 +854,21 @@ impl Func { ) -> Self { let anyfunc = export.anyfunc.as_ref(); let trampoline = store.lookup_trampoline(&*anyfunc); - let data = FuncData::StoreOwned { trampoline, export }; - Func(store.store_data_mut().insert(data)) + Func::from_func_kind(FuncKind::StoreOwned { trampoline, export }, store) + } + + fn from_func_kind(kind: FuncKind, store: &mut StoreOpaque) -> Self { + // Signatures should always be registered in the engine's registry of + // shared signatures, so we should be able to unwrap safely here. + let ty = unsafe { kind.export().anyfunc.as_ref().type_index }; + let ty = FuncType::from_wasm_func_type( + store + .engine() + .signatures() + .lookup_type(ty) + .expect("signature should be registered"), + ); + Func(store.store_data_mut().insert(FuncData { kind, ty })) } pub(crate) fn vmimport(&self, store: &mut StoreOpaque) -> VMFunctionImport { @@ -1972,13 +2003,13 @@ impl HostFunc { pub unsafe fn to_func(self: &Arc, store: &mut StoreOpaque) -> Func { self.register_trampoline(store); let me = self.clone(); - Func(store.store_data_mut().insert(FuncData::SharedHost(me))) + Func::from_func_kind(FuncKind::SharedHost(me), store) } /// Same as [`HostFunc::to_func`], different ownership. unsafe fn into_func(self, store: &mut StoreOpaque) -> Func { self.register_trampoline(store); - Func(store.store_data_mut().insert(FuncData::Host(self))) + Func::from_func_kind(FuncKind::Host(self), store) } unsafe fn register_trampoline(&self, store: &mut StoreOpaque) { @@ -2014,20 +2045,28 @@ impl Drop for HostFunc { } impl FuncData { + #[inline] fn trampoline(&self) -> VMTrampoline { - match self { - FuncData::StoreOwned { trampoline, .. } => *trampoline, - FuncData::SharedHost(host) => host.trampoline, - FuncData::Host(host) => host.trampoline, + match &self.kind { + FuncKind::StoreOwned { trampoline, .. } => *trampoline, + FuncKind::SharedHost(host) => host.trampoline, + FuncKind::Host(host) => host.trampoline, } } + #[inline] + fn export(&self) -> &ExportFunction { + self.kind.export() + } +} + +impl FuncKind { #[inline] fn export(&self) -> &ExportFunction { match self { - FuncData::StoreOwned { export, .. } => export, - FuncData::SharedHost(host) => &host.export, - FuncData::Host(host) => &host.export, + FuncKind::StoreOwned { export, .. } => export, + FuncKind::SharedHost(host) => &host.export, + FuncKind::Host(host) => &host.export, } } } diff --git a/crates/wasmtime/src/lib.rs b/crates/wasmtime/src/lib.rs index 64b2bf6d53..7b6a096868 100644 --- a/crates/wasmtime/src/lib.rs +++ b/crates/wasmtime/src/lib.rs @@ -440,7 +440,7 @@ fn _assert_send_sync() { #[cfg(feature = "async")] fn _call_async(s: &mut Store<()>, f: Func) { - _assert_send(f.call_async(&mut *s, &[])) + _assert_send(f.call_async(&mut *s, &[], &mut [])) } #[cfg(feature = "async")] fn _typed_call_async(s: &mut Store<()>, f: TypedFunc<(), ()>) { diff --git a/crates/wasmtime/src/linker.rs b/crates/wasmtime/src/linker.rs index c3d8940c31..a84b497959 100644 --- a/crates/wasmtime/src/linker.rs +++ b/crates/wasmtime/src/linker.rs @@ -645,21 +645,14 @@ impl Linker { // `unwrap()` everything here because we know the instance contains a // function export with the given name and signature because we're // iterating over the module it was instantiated from. - let command_results = instance + instance .get_export(&mut caller, &export_name) .unwrap() .into_func() .unwrap() - .call(&mut caller, params) + .call(&mut caller, params, results) .map_err(|error| error.downcast::().unwrap())?; - // Copy the return values into the output slice. - for (result, command_result) in - results.iter_mut().zip(command_results.into_vec()) - { - *result = command_result; - } - Ok(()) }, ) @@ -718,20 +711,14 @@ impl Linker { let (instance_pre, export_name) = &*upvars; let instance = instance_pre.instantiate_async(&mut caller).await?; - let command_results = instance + instance .get_export(&mut caller, &export_name) .unwrap() .into_func() .unwrap() - .call_async(&mut caller, params) + .call_async(&mut caller, params, results) .await .map_err(|error| error.downcast::().unwrap())?; - - for (result, command_result) in - results.iter_mut().zip(command_results.into_vec()) - { - *result = command_result; - } Ok(()) }) }, diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index 015eb29c5c..e797984bc7 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -274,6 +274,9 @@ pub struct StoreOpaque { /// `Func::new` to avoid allocating a new vector each time a function is /// called. hostcall_val_storage: Vec, + /// Same as `hostcall_val_storage`, but for the direction of the host + /// calling wasm. + wasm_u128_storage: Vec, } #[cfg(feature = "async")] @@ -430,6 +433,7 @@ impl Store { store_data: StoreData::new(), default_callee, hostcall_val_storage: Vec::new(), + wasm_u128_storage: Vec::new(), }, limiter: None, call_hook: None, @@ -1160,6 +1164,7 @@ impl StoreOpaque { /// Takes the cached `Vec` stored internally across hostcalls to get /// used as part of calling the host in a `Func::new` method invocation. + #[inline] pub fn take_hostcall_val_storage(&mut self) -> Vec { mem::take(&mut self.hostcall_val_storage) } @@ -1167,11 +1172,28 @@ impl StoreOpaque { /// Restores the vector previously taken by `take_hostcall_val_storage` /// above back into the store, allowing it to be used in the future for the /// next wasm->host call. + #[inline] pub fn save_hostcall_val_storage(&mut self, storage: Vec) { if storage.capacity() > self.hostcall_val_storage.capacity() { self.hostcall_val_storage = storage; } } + + /// Same as `take_hostcall_val_storage`, but for the direction of the host + /// calling wasm. + #[inline] + pub fn take_wasm_u128_storage(&mut self) -> Vec { + mem::take(&mut self.wasm_u128_storage) + } + + /// Same as `save_hostcall_val_storage`, but for the direction of the host + /// calling wasm. + #[inline] + pub fn save_wasm_u128_storage(&mut self, storage: Vec) { + if storage.capacity() > self.wasm_u128_storage.capacity() { + self.wasm_u128_storage = storage; + } + } } impl StoreContextMut<'_, T> { diff --git a/crates/wasmtime/src/types.rs b/crates/wasmtime/src/types.rs index 479187825c..738cc1c7dd 100644 --- a/crates/wasmtime/src/types.rs +++ b/crates/wasmtime/src/types.rs @@ -238,11 +238,13 @@ impl FuncType { } /// Returns the list of parameter types for this function. + #[inline] pub fn params(&self) -> impl ExactSizeIterator + '_ { self.sig.params().iter().map(ValType::from_wasm_type) } /// Returns the list of result types for this function. + #[inline] pub fn results(&self) -> impl ExactSizeIterator + '_ { self.sig.returns().iter().map(ValType::from_wasm_type) } diff --git a/crates/wast/src/wast.rs b/crates/wast/src/wast.rs index 0308f3f611..c351f0ae0c 100644 --- a/crates/wast/src/wast.rs +++ b/crates/wast/src/wast.rs @@ -163,8 +163,10 @@ impl WastContext { .get_export(instance_name, field)? .into_func() .ok_or_else(|| anyhow!("no function named `{}`", field))?; - Ok(match func.call(&mut self.store, args) { - Ok(result) => Outcome::Ok(result.into()), + + let mut results = vec![Val::null(); func.ty(&self.store).results().len()]; + Ok(match func.call(&mut self.store, args, &mut results) { + Ok(()) => Outcome::Ok(results.into()), Err(e) => Outcome::Trap(e.downcast()?), }) } diff --git a/crates/wiggle/tests/wasmtime_async.rs b/crates/wiggle/tests/wasmtime_async.rs index 51a0e4aa82..19209f9fc2 100644 --- a/crates/wiggle/tests/wasmtime_async.rs +++ b/crates/wiggle/tests/wasmtime_async.rs @@ -1,4 +1,4 @@ -use wasmtime::{Config, Engine, Linker, Module, Store}; +use wasmtime::{Config, Engine, Linker, Module, Store, Val}; wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/atoms.witx"], @@ -42,14 +42,14 @@ async fn test_sync_host_func() { .await .unwrap(); - let results = shim_inst + let mut results = [Val::I32(0)]; + shim_inst .get_func(&mut store, "int_float_args_shim") .unwrap() - .call_async(&mut store, &[0i32.into(), 123.45f32.into()]) + .call_async(&mut store, &[0i32.into(), 123.45f32.into()], &mut results) .await .unwrap(); - assert_eq!(results.len(), 1, "one return value"); assert_eq!( results[0].unwrap_i32(), types::Errno::Ok as i32, @@ -72,14 +72,18 @@ async fn test_async_host_func() { let input: i32 = 123; let result_location: i32 = 0; - let results = shim_inst + let mut results = [Val::I32(0)]; + shim_inst .get_func(&mut store, "double_int_return_float_shim") .unwrap() - .call_async(&mut store, &[input.into(), result_location.into()]) + .call_async( + &mut store, + &[input.into(), result_location.into()], + &mut results, + ) .await .unwrap(); - assert_eq!(results.len(), 1, "one return value"); assert_eq!( results[0].unwrap_i32(), types::Errno::Ok as i32, diff --git a/crates/wiggle/tests/wasmtime_integration.rs b/crates/wiggle/tests/wasmtime_integration.rs index 756d8ca8f6..8617c6d757 100644 --- a/crates/wiggle/tests/wasmtime_integration.rs +++ b/crates/wiggle/tests/wasmtime_integration.rs @@ -1,4 +1,4 @@ -use wasmtime::{Engine, Linker, Module, Store}; +use wasmtime::{Engine, Linker, Module, Store, Val}; // from_witx invocation says the func is async. This context doesn't support async! wiggle::from_witx!({ @@ -49,13 +49,13 @@ fn test_sync_host_func() { let shim_mod = shim_module(&engine); let shim_inst = linker.instantiate(&mut store, &shim_mod).unwrap(); - let results = shim_inst + let mut results = [Val::I32(0)]; + shim_inst .get_func(&mut store, "int_float_args_shim") .unwrap() - .call(&mut store, &[0i32.into(), 123.45f32.into()]) + .call(&mut store, &[0i32.into(), 123.45f32.into()], &mut results) .unwrap(); - assert_eq!(results.len(), 1, "one return value"); assert_eq!( results[0].unwrap_i32(), types::Errno::Ok as i32, @@ -76,13 +76,17 @@ fn test_async_host_func() { let input: i32 = 123; let result_location: i32 = 0; - let results = shim_inst + let mut results = [Val::I32(0)]; + shim_inst .get_func(&mut store, "double_int_return_float_shim") .unwrap() - .call(&mut store, &[input.into(), result_location.into()]) + .call( + &mut store, + &[input.into(), result_location.into()], + &mut results, + ) .unwrap(); - assert_eq!(results.len(), 1, "one return value"); assert_eq!( results[0].unwrap_i32(), types::Errno::Ok as i32, diff --git a/crates/wiggle/tests/wasmtime_sync.rs b/crates/wiggle/tests/wasmtime_sync.rs index 8332b19a55..3410e530ea 100644 --- a/crates/wiggle/tests/wasmtime_sync.rs +++ b/crates/wiggle/tests/wasmtime_sync.rs @@ -1,4 +1,4 @@ -use wasmtime::{Engine, Linker, Module, Store}; +use wasmtime::{Engine, Linker, Module, Store, Val}; wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/atoms.witx"], @@ -55,13 +55,13 @@ fn test_sync_host_func() { let shim_mod = shim_module(&engine); let shim_inst = linker.instantiate(&mut store, &shim_mod).unwrap(); - let results = shim_inst + let mut results = [Val::I32(0)]; + shim_inst .get_func(&mut store, "int_float_args_shim") .unwrap() - .call(&mut store, &[0i32.into(), 123.45f32.into()]) + .call(&mut store, &[0i32.into(), 123.45f32.into()], &mut results) .unwrap(); - assert_eq!(results.len(), 1, "one return value"); assert_eq!( results[0].unwrap_i32(), types::Errno::Ok as i32, @@ -82,13 +82,17 @@ fn test_async_host_func() { let input: i32 = 123; let result_location: i32 = 0; - let results = shim_inst + let mut results = [Val::I32(0)]; + shim_inst .get_func(&mut store, "double_int_return_float_shim") .unwrap() - .call(&mut store, &[input.into(), result_location.into()]) + .call( + &mut store, + &[input.into(), result_location.into()], + &mut results, + ) .unwrap(); - assert_eq!(results.len(), 1, "one return value"); assert_eq!( results[0].unwrap_i32(), types::Errno::Ok as i32, @@ -121,7 +125,11 @@ fn test_async_host_func_pending() { let trap = shim_inst .get_func(&mut store, "double_int_return_float_shim") .unwrap() - .call(&mut store, &[input.into(), result_location.into()]) + .call( + &mut store, + &[input.into(), result_location.into()], + &mut [Val::I32(0)], + ) .unwrap_err(); assert!( format!("{}", trap).contains("Cannot wait on pending future"), diff --git a/src/commands/run.rs b/src/commands/run.rs index ab01807cc5..c7877d10d1 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -343,7 +343,8 @@ impl RunCommand { // Invoke the function and then afterwards print all the results that came // out, if there are any. - let results = func.call(store, &values).with_context(|| { + let mut results = vec![Val::null(); ty.results().len()]; + func.call(store, &values, &mut results).with_context(|| { if let Some(name) = name { format!("failed to invoke `{}`", name) } else { @@ -357,7 +358,7 @@ impl RunCommand { ); } - for result in results.into_vec() { + for result in results { match result { Val::I32(i) => println!("{}", i), Val::I64(i) => println!("{}", i), diff --git a/tests/all/async_functions.rs b/tests/all/async_functions.rs index 20349c5a58..584501ded3 100644 --- a/tests/all/async_functions.rs +++ b/tests/all/async_functions.rs @@ -9,8 +9,8 @@ fn async_store() -> Store<()> { } fn run_smoke_test(store: &mut Store<()>, func: Func) { - run(func.call_async(&mut *store, &[])).unwrap(); - run(func.call_async(&mut *store, &[])).unwrap(); + run(func.call_async(&mut *store, &[], &mut [])).unwrap(); + run(func.call_async(&mut *store, &[], &mut [])).unwrap(); } fn run_smoke_typed_test(store: &mut Store<()>, func: Func) { @@ -159,7 +159,9 @@ fn recursive_call() { FuncType::new(None, None), move |mut caller, _params, _results| { Box::new(async move { - async_wasm_func.call_async(&mut caller, &[]).await?; + async_wasm_func + .call_async(&mut caller, &[], &mut []) + .await?; Ok(()) }) }, @@ -184,7 +186,7 @@ fn recursive_call() { run(async { let instance = Instance::new_async(&mut store, &module, &[func2.into()]).await?; let func = instance.get_func(&mut store, "").unwrap(); - func.call_async(&mut store, &[]).await + func.call_async(&mut store, &[], &mut []).await }) .unwrap(); } @@ -209,7 +211,7 @@ fn suspend_while_suspending() { &mut store, FuncType::new(None, None), move |mut caller, _params, _results| { - run(async_thunk.call_async(&mut caller, &[]))?; + run(async_thunk.call_async(&mut caller, &[], &mut []))?; Ok(()) }, ); @@ -249,7 +251,7 @@ fn suspend_while_suspending() { ) .await?; let func = instance.get_func(&mut store, "").unwrap(); - func.call_async(&mut store, &[]).await + func.call_async(&mut store, &[], &mut []).await }) .unwrap(); } @@ -277,7 +279,7 @@ fn cancel_during_run() { // Create our future, but as per async conventions this still doesn't // actually do anything. No wasm or host function has been called yet. - let mut future = Pin::from(Box::new(async_thunk.call_async(&mut store, &[]))); + let mut future = Pin::from(Box::new(async_thunk.call_async(&mut store, &[], &mut []))); // Push the future forward one tick, which actually runs the host code in // our async func. Our future is designed to be pending once, however. @@ -608,7 +610,7 @@ fn resume_separate_thread3() { // restored even though the asynchronous execution is suspended. Err::<(), _>(wasmtime::Trap::new("")) }); - assert!(f.call(&mut store, &[]).is_err()); + assert!(f.call(&mut store, &[], &mut []).is_err()); } #[test] @@ -636,7 +638,7 @@ fn recursive_async() -> Result<()> { Ok(()) }) }); - run(f2.call_async(&mut store, &[]))?; + run(f2.call_async(&mut store, &[], &mut []))?; Ok(()) } diff --git a/tests/all/call_hook.rs b/tests/all/call_hook.rs index bb63b611d1..e253519f80 100644 --- a/tests/all/call_hook.rs +++ b/tests/all/call_hook.rs @@ -31,6 +31,7 @@ fn call_wrapped_func() -> Result<(), Error> { f.call( &mut store, &[Val::I32(1), Val::I64(2), 3.0f32.into(), 4.0f64.into()], + &mut [], )?; // One switch from vm to host to call f, another in return from f. @@ -85,6 +86,7 @@ async fn call_wrapped_async_func() -> Result<(), Error> { f.call_async( &mut store, &[Val::I32(1), Val::I64(2), 3.0f32.into(), 4.0f64.into()], + &mut [], ) .await?; @@ -154,7 +156,7 @@ fn call_linked_func() -> Result<(), Error> { .into_func() .expect("export is func"); - export.call(&mut store, &[])?; + export.call(&mut store, &[], &mut [])?; // One switch from vm to host to call f, another in return from f. assert_eq!(store.data().calls_into_host, 1); @@ -225,7 +227,7 @@ async fn call_linked_func_async() -> Result<(), Error> { .into_func() .expect("export is func"); - export.call_async(&mut store, &[]).await?; + export.call_async(&mut store, &[], &mut []).await?; // One switch from vm to host to call f, another in return from f. assert_eq!(store.data().calls_into_host, 1); @@ -333,7 +335,7 @@ fn recursion() -> Result<(), Error> { // Recursion depth: let n: usize = 10; - export.call(&mut store, &[Val::I32(n as i32)])?; + export.call(&mut store, &[Val::I32(n as i32)], &mut [])?; // Recurse down to 0: n+1 calls assert_eq!(store.data().calls_into_host, n + 1); @@ -423,6 +425,7 @@ fn trapping() -> Result<(), Error> { let r = export.call( &mut store, &[Val::I32(action), Val::I32(if recur { 1 } else { 0 })], + &mut [], ); (store.into_data(), r.err()) }; diff --git a/tests/all/externals.rs b/tests/all/externals.rs index 847c44f7e7..04742fa506 100644 --- a/tests/all/externals.rs +++ b/tests/all/externals.rs @@ -114,16 +114,24 @@ fn cross_store() -> anyhow::Result<()> { let s1_f = s1_inst.get_func(&mut store1, "f").unwrap(); let s2_f = s2_inst.get_func(&mut store2, "f").unwrap(); - assert!(s1_f.call(&mut store1, &[Val::FuncRef(None)]).is_ok()); - assert!(s2_f.call(&mut store2, &[Val::FuncRef(None)]).is_ok()); - assert!(s1_f.call(&mut store1, &[Some(s1_f.clone()).into()]).is_ok()); assert!(s1_f - .call(&mut store1, &[Some(s2_f.clone()).into()]) + .call(&mut store1, &[Val::FuncRef(None)], &mut []) + .is_ok()); + assert!(s2_f + .call(&mut store2, &[Val::FuncRef(None)], &mut []) + .is_ok()); + assert!(s1_f + .call(&mut store1, &[Some(s1_f.clone()).into()], &mut []) + .is_ok()); + assert!(s1_f + .call(&mut store1, &[Some(s2_f.clone()).into()], &mut []) .is_err()); assert!(s2_f - .call(&mut store2, &[Some(s1_f.clone()).into()]) + .call(&mut store2, &[Some(s1_f.clone()).into()], &mut []) .is_err()); - assert!(s2_f.call(&mut store2, &[Some(s2_f.clone()).into()]).is_ok()); + assert!(s2_f + .call(&mut store2, &[Some(s2_f.clone()).into()], &mut []) + .is_ok()); let s1_f_t = s1_f.typed::, (), _>(&store1)?; let s2_f_t = s2_f.typed::, (), _>(&store2)?; diff --git a/tests/all/func.rs b/tests/all/func.rs index 64d0b3e63e..da4c630fa4 100644 --- a/tests/all/func.rs +++ b/tests/all/func.rs @@ -193,10 +193,12 @@ fn import_works() -> Result<()> { f.as_ref().unwrap().data().downcast_ref::().unwrap(), "hello" ); - assert_eq!( - g.as_ref().unwrap().call(&mut caller, &[]).unwrap()[0].unwrap_i32(), - 42 - ); + let mut results = [Val::I32(0)]; + g.as_ref() + .unwrap() + .call(&mut caller, &[], &mut results) + .unwrap(); + assert_eq!(results[0].unwrap_i32(), 42); assert_eq!(HITS.fetch_add(1, SeqCst), 3); }, ) @@ -211,6 +213,7 @@ fn import_works() -> Result<()> { Val::ExternRef(Some(ExternRef::new("hello".to_string()))), funcref, ], + &mut [], )?; assert_eq!(HITS.load(SeqCst), 4); Ok(()) @@ -222,7 +225,10 @@ fn trap_smoke() -> Result<()> { let f = Func::wrap(&mut store, || -> Result<(), Trap> { Err(Trap::new("test")) }); - let err = f.call(&mut store, &[]).unwrap_err().downcast::()?; + let err = f + .call(&mut store, &[], &mut []) + .unwrap_err() + .downcast::()?; assert!(err.to_string().contains("test")); assert!(err.i32_exit_status().is_none()); Ok(()) @@ -347,31 +353,29 @@ fn call_wrapped_func() -> Result<()> { f.call( &mut store, &[Val::I32(1), Val::I64(2), 3.0f32.into(), 4.0f64.into()], + &mut [], )?; f.typed::<(i32, i64, f32, f64), (), _>(&store)? .call(&mut store, (1, 2, 3.0, 4.0))?; + let mut results = [Val::I32(0)]; let f = Func::wrap(&mut store, || 1i32); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_i32(), 1); assert_eq!(f.typed::<(), i32, _>(&store)?.call(&mut store, ())?, 1); let f = Func::wrap(&mut store, || 2i64); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_i64(), 2); assert_eq!(f.typed::<(), i64, _>(&store)?.call(&mut store, ())?, 2); let f = Func::wrap(&mut store, || 3.0f32); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_f32(), 3.0); assert_eq!(f.typed::<(), f32, _>(&store)?.call(&mut store, ())?, 3.0); let f = Func::wrap(&mut store, || 4.0f64); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_f64(), 4.0); assert_eq!(f.typed::<(), f64, _>(&store)?.call(&mut store, ())?, 4.0); Ok(()) @@ -385,7 +389,7 @@ fn caller_memory() -> anyhow::Result<()> { assert!(c.get_export("y").is_none()); assert!(c.get_export("z").is_none()); }); - f.call(&mut store, &[])?; + f.call(&mut store, &[], &mut [])?; let f = Func::wrap(&mut store, |mut c: Caller<'_, ()>| { assert!(c.get_export("x").is_none()); @@ -447,7 +451,10 @@ fn func_write_nothing() -> anyhow::Result<()> { let mut store = Store::<()>::default(); let ty = FuncType::new(None, Some(ValType::I32)); let f = Func::new(&mut store, ty, |_, _, _| Ok(())); - let err = f.call(&mut store, &[]).unwrap_err().downcast::()?; + let err = f + .call(&mut store, &[], &mut [Val::I32(0)]) + .unwrap_err() + .downcast::()?; assert!(err .to_string() .contains("function attempted to return an incompatible value")); @@ -479,7 +486,7 @@ fn return_cross_store_value() -> anyhow::Result<()> { let instance = Instance::new(&mut store1, &module, &[return_cross_store_func.into()])?; let run = instance.get_func(&mut store1, "run").unwrap(); - let result = run.call(&mut store1, &[]); + let result = run.call(&mut store1, &[], &mut [Val::I32(0)]); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("cross-`Store`")); @@ -500,7 +507,11 @@ fn pass_cross_store_arg() -> anyhow::Result<()> { // Using regular `.call` fails with cross-Store arguments. assert!(store1_func - .call(&mut store1, &[Val::FuncRef(Some(store2_func.clone()))]) + .call( + &mut store1, + &[Val::FuncRef(Some(store2_func.clone()))], + &mut [] + ) .is_err()); // And using `.get` followed by a function call also fails with cross-Store @@ -553,7 +564,7 @@ fn trampolines_always_valid() -> anyhow::Result<()> { drop(module2); // ... and no segfaults! right? right? ... - func.call(&mut store, &[])?; + func.call(&mut store, &[], &mut [])?; Ok(()) } @@ -616,7 +627,7 @@ fn trap_doesnt_leak() -> anyhow::Result<()> { Err(Trap::new("")) }); assert!(f1.typed::<(), (), _>(&store)?.call(&mut store, ()).is_err()); - assert!(f1.call(&mut store, &[]).is_err()); + assert!(f1.call(&mut store, &[], &mut []).is_err()); // test that `Func::new` is correct let canary2 = Canary::default(); @@ -626,7 +637,7 @@ fn trap_doesnt_leak() -> anyhow::Result<()> { Err(Trap::new("")) }); assert!(f2.typed::<(), (), _>(&store)?.call(&mut store, ()).is_err()); - assert!(f2.call(&mut store, &[]).is_err()); + assert!(f2.call(&mut store, &[], &mut []).is_err()); // drop everything and ensure dtors are run drop(store); @@ -651,15 +662,18 @@ fn wrap_multiple_results() -> anyhow::Result<()> { + Sync, { let f = Func::wrap(&mut *store, move || t); - assert_eq!(f.typed::<(), T, _>(&store,)?.call(&mut *store, ())?, t); - assert!(t.eq_values(&f.call(&mut *store, &[])?)); + let mut results = vec![Val::I32(0); f.ty(&store).results().len()]; + assert_eq!(f.typed::<(), T, _>(&store)?.call(&mut *store, ())?, t); + f.call(&mut *store, &[], &mut results)?; + assert!(t.eq_values(&results)); let module = Module::new(store.engine(), &T::gen_wasm())?; let instance = Instance::new(&mut *store, &module, &[f.into()])?; let f = instance.get_func(&mut *store, "foo").unwrap(); assert_eq!(f.typed::<(), T, _>(&store)?.call(&mut *store, ())?, t); - assert!(t.eq_values(&f.call(&mut *store, &[])?)); + f.call(&mut *store, &[], &mut results)?; + assert!(t.eq_values(&results)); Ok(()) } @@ -820,7 +834,7 @@ fn trampoline_for_declared_elem() -> anyhow::Result<()> { let g = instance.get_typed_func::<(), Option, _>(&mut store, "g")?; let func = g.call(&mut store, ())?; - func.unwrap().call(&mut store, &[])?; + func.unwrap().call(&mut store, &[], &mut [])?; Ok(()) } diff --git a/tests/all/funcref.rs b/tests/all/funcref.rs index 676fd255e8..73dd21319d 100644 --- a/tests/all/funcref.rs +++ b/tests/all/funcref.rs @@ -20,8 +20,12 @@ fn pass_funcref_in_and_out_of_wasm() -> anyhow::Result<()> { // Pass in a non-null funcref. { - let results = func.call(&mut store, &[Val::FuncRef(Some(func.clone()))])?; - assert_eq!(results.len(), 1); + let mut results = [Val::I32(0)]; + func.call( + &mut store, + &[Val::FuncRef(Some(func.clone()))], + &mut results, + )?; // Can't compare `Func` for equality, so this is the best we can do here. let result_func = results[0].unwrap_funcref().unwrap(); @@ -30,9 +34,8 @@ fn pass_funcref_in_and_out_of_wasm() -> anyhow::Result<()> { // Pass in a null funcref. { - let results = func.call(&mut store, &[Val::FuncRef(None)])?; - assert_eq!(results.len(), 1); - + let mut results = [Val::I32(0)]; + func.call(&mut store, &[Val::FuncRef(None)], &mut results)?; let result_func = results[0].unwrap_funcref(); assert!(result_func.is_none()); } @@ -42,9 +45,11 @@ fn pass_funcref_in_and_out_of_wasm() -> anyhow::Result<()> { let other_instance = Instance::new(&mut store, &module, &[])?; let other_instance_func = other_instance.get_func(&mut store, "func").unwrap(); - let results = func.call( + let mut results = [Val::I32(0)]; + func.call( &mut store, &[Val::FuncRef(Some(other_instance_func.clone()))], + &mut results, )?; assert_eq!(results.len(), 1); @@ -61,7 +66,9 @@ fn pass_funcref_in_and_out_of_wasm() -> anyhow::Result<()> { .get_func(&mut other_store, "f") .unwrap(); - assert!(func.call(&mut store, &[Val::FuncRef(Some(f))]).is_err()); + assert!(func + .call(&mut store, &[Val::FuncRef(Some(f))], &mut [Val::I32(0)]) + .is_err()); } Ok(()) @@ -82,9 +89,8 @@ fn receive_null_funcref_from_wasm() -> anyhow::Result<()> { let instance = Instance::new(&mut store, &module, &[])?; let get_null = instance.get_func(&mut store, "get-null").unwrap(); - let results = get_null.call(&mut store, &[])?; - assert_eq!(results.len(), 1); - + let mut results = [Val::I32(0)]; + get_null.call(&mut store, &[], &mut results)?; let result_func = results[0].unwrap_funcref(); assert!(result_func.is_none()); @@ -101,7 +107,7 @@ fn wrong_store() -> anyhow::Result<()> { let set = SetOnDrop(dropped.clone()); let f1 = Func::wrap(&mut store1, move || drop(&set)); let f2 = Func::wrap(&mut store2, move || Some(f1.clone())); - assert!(f2.call(&mut store2, &[]).is_err()); + assert!(f2.call(&mut store2, &[], &mut []).is_err()); } assert!(dropped.load(SeqCst)); @@ -133,7 +139,7 @@ fn func_new_returns_wrong_store() -> anyhow::Result<()> { Ok(()) }, ); - assert!(f2.call(&mut store2, &[]).is_err()); + assert!(f2.call(&mut store2, &[], &mut [Val::I32(0)]).is_err()); } assert!(dropped.load(SeqCst)); diff --git a/tests/all/gc.rs b/tests/all/gc.rs index 8092613eb5..73ffe03ece 100644 --- a/tests/all/gc.rs +++ b/tests/all/gc.rs @@ -47,7 +47,7 @@ fn smoke_test_gc() -> anyhow::Result<()> { let r = ExternRef::new(SetFlagOnDrop(inner_dropped.clone())); { let args = [Val::I32(5), Val::ExternRef(Some(r.clone()))]; - func.call(&mut store, &args)?; + func.call(&mut store, &args, &mut [Val::I32(0)])?; } // Still held alive by the `VMExternRefActivationsTable` (potentially in @@ -88,7 +88,7 @@ fn wasm_dropping_refs() -> anyhow::Result<()> { for _ in 0..4096 { let r = ExternRef::new(CountDrops(num_refs_dropped.clone())); let args = [Val::ExternRef(Some(r))]; - drop_ref.call(&mut store, &args)?; + drop_ref.call(&mut store, &args, &mut [])?; } assert!(num_refs_dropped.load(SeqCst) > 0); @@ -163,7 +163,7 @@ fn many_live_refs() -> anyhow::Result<()> { let instance = Instance::new(&mut store, &module, &[make_ref.into(), observe_ref.into()])?; let many_live_refs = instance.get_func(&mut store, "many_live_refs").unwrap(); - many_live_refs.call(&mut store, &[])?; + many_live_refs.call(&mut store, &[], &mut [])?; store.gc(); assert_eq!(live_refs.load(SeqCst), 0); @@ -214,7 +214,7 @@ fn drop_externref_via_table_set() -> anyhow::Result<()> { { let args = vec![Val::ExternRef(Some(foo))]; - table_set.call(&mut store, &args)?; + table_set.call(&mut store, &args, &mut [])?; } store.gc(); assert!(!foo_is_dropped.load(SeqCst)); @@ -222,13 +222,13 @@ fn drop_externref_via_table_set() -> anyhow::Result<()> { { let args = vec![Val::ExternRef(Some(bar))]; - table_set.call(&mut store, &args)?; + table_set.call(&mut store, &args, &mut [])?; } store.gc(); assert!(foo_is_dropped.load(SeqCst)); assert!(!bar_is_dropped.load(SeqCst)); - table_set.call(&mut store, &[Val::ExternRef(None)])?; + table_set.call(&mut store, &[Val::ExternRef(None)], &mut [])?; assert!(foo_is_dropped.load(SeqCst)); assert!(bar_is_dropped.load(SeqCst)); diff --git a/tests/all/host_funcs.rs b/tests/all/host_funcs.rs index d88faab143..3af5aa92d6 100644 --- a/tests/all/host_funcs.rs +++ b/tests/all/host_funcs.rs @@ -279,10 +279,12 @@ fn import_works() -> Result<()> { f.as_ref().unwrap().data().downcast_ref::().unwrap(), "hello" ); - assert_eq!( - g.as_ref().unwrap().call(&mut caller, &[]).unwrap()[0].unwrap_i32(), - 42 - ); + let mut results = [Val::I32(0)]; + g.as_ref() + .unwrap() + .call(&mut caller, &[], &mut results) + .unwrap(); + assert_eq!(results[0].unwrap_i32(), 42); assert_eq!(HITS.fetch_add(1, SeqCst), 3); }, )?; @@ -299,6 +301,7 @@ fn import_works() -> Result<()> { Val::ExternRef(Some(ExternRef::new("hello".to_string()))), funcref, ], + &mut [], )?; assert_eq!(HITS.load(SeqCst), 4); @@ -360,7 +363,7 @@ fn call_import_many_args() -> Result<()> { let mut store = Store::new(&engine, ()); let instance = linker.instantiate(&mut store, &module)?; let run = instance.get_func(&mut store, "run").unwrap(); - run.call(&mut store, &[])?; + run.call(&mut store, &[], &mut [])?; Ok(()) } @@ -422,6 +425,7 @@ fn call_wasm_many_args() -> Result<()> { 9.into(), 10.into(), ], + &mut [], )?; let typed_run = instance @@ -431,7 +435,7 @@ fn call_wasm_many_args() -> Result<()> { typed_run.call(&mut store, (1, 2, 3, 4, 5, 6, 7, 8, 9, 10))?; let test = instance.get_func(&mut store, "test").unwrap(); - test.call(&mut store, &[])?; + test.call(&mut store, &[], &mut [])?; Ok(()) } @@ -450,7 +454,10 @@ fn trap_smoke() -> Result<()> { .into_func() .unwrap(); - let err = f.call(&mut store, &[]).unwrap_err().downcast::()?; + let err = f + .call(&mut store, &[], &mut []) + .unwrap_err() + .downcast::()?; assert!(err.to_string().contains("test")); assert!(err.i32_exit_status().is_none()); @@ -524,6 +531,7 @@ fn new_from_signature() -> Result<()> { fn call_wrapped_func() -> Result<()> { let engine = Engine::default(); let mut linker = Linker::new(&engine); + let mut results = [Val::I32(0)]; linker.func_wrap("", "f1", |a: i32, b: i64, c: f32, d: f64| { assert_eq!(a, 1); @@ -550,6 +558,7 @@ fn call_wrapped_func() -> Result<()> { f.call( &mut store, &[Val::I32(1), Val::I64(2), 3.0f32.into(), 4.0f64.into()], + &mut [], )?; f.typed::<(i32, i64, f32, f64), (), _>(&store)? .call(&mut store, (1, 2, 3.0, 4.0))?; @@ -559,8 +568,7 @@ fn call_wrapped_func() -> Result<()> { .unwrap() .into_func() .unwrap(); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_i32(), 1); assert_eq!(f.typed::<(), i32, _>(&store)?.call(&mut store, ())?, 1); @@ -569,8 +577,7 @@ fn call_wrapped_func() -> Result<()> { .unwrap() .into_func() .unwrap(); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_i64(), 2); assert_eq!(f.typed::<(), i64, _>(&store)?.call(&mut store, ())?, 2); @@ -579,8 +586,7 @@ fn call_wrapped_func() -> Result<()> { .unwrap() .into_func() .unwrap(); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_f32(), 3.0); assert_eq!(f.typed::<(), f32, _>(&store)?.call(&mut store, ())?, 3.0); @@ -589,8 +595,7 @@ fn call_wrapped_func() -> Result<()> { .unwrap() .into_func() .unwrap(); - let results = f.call(&mut store, &[])?; - assert_eq!(results.len(), 1); + f.call(&mut store, &[], &mut results)?; assert_eq!(results[0].unwrap_f64(), 4.0); assert_eq!(f.typed::<(), f64, _>(&store)?.call(&mut store, ())?, 4.0); @@ -610,7 +615,10 @@ fn func_return_nothing() -> Result<()> { .unwrap() .into_func() .unwrap(); - let err = f.call(&mut store, &[]).unwrap_err().downcast::()?; + let err = f + .call(&mut store, &[], &mut [Val::I32(0)]) + .unwrap_err() + .downcast::()?; assert!(err .to_string() .contains("function attempted to return an incompatible value")); @@ -658,18 +666,17 @@ fn call_via_funcref() -> Result<()> { .unwrap() .into_func() .unwrap(); - let results = instance + let mut results = [Val::I32(0), Val::I32(0)]; + instance .get_func(&mut store, "call") .unwrap() - .call(&mut store, &[f.into()])?; - - assert_eq!(results.len(), 2); + .call(&mut store, &[f.into()], &mut results)?; assert_eq!(results[0].unwrap_i32(), 7); { let f = results[1].unwrap_funcref().unwrap(); - let results = f.call(&mut store, &[1.into(), 2.into()])?; - assert_eq!(results.len(), 1); + let mut results = [Val::I32(0)]; + f.call(&mut store, &[1.into(), 2.into()], &mut results)?; assert_eq!(results[0].unwrap_i32(), 3); } @@ -706,7 +713,7 @@ fn store_with_context() -> Result<()> { .unwrap() .into_func() .unwrap(); - f.call(&mut store, &[])?; + f.call(&mut store, &[], &mut [])?; assert!(store.data().called); diff --git a/tests/all/import_calling_export.rs b/tests/all/import_calling_export.rs index e26e545aa6..dd7342a054 100644 --- a/tests/all/import_calling_export.rs +++ b/tests/all/import_calling_export.rs @@ -24,7 +24,7 @@ fn test_import_calling_export() { caller .data() .unwrap() - .call(&mut caller, &[]) + .call(&mut caller, &[], &mut []) .expect("expected function not to trap"); Ok(()) }, @@ -44,7 +44,7 @@ fn test_import_calling_export() { *store.data_mut() = Some(other_func); run_func - .call(&mut store, &[]) + .call(&mut store, &[], &mut []) .expect("expected function not to trap"); } @@ -79,8 +79,9 @@ fn test_returns_incorrect_type() -> Result<()> { .get_func(&mut store, "run") .expect("expected a run func in the module"); + let mut result = [Val::I32(0)]; let trap = run_func - .call(&mut store, &[]) + .call(&mut store, &[], &mut result) .expect_err("the execution should fail") .downcast::()?; assert!(trap diff --git a/tests/all/invoke_func_via_table.rs b/tests/all/invoke_func_via_table.rs index ada0271881..a50d30ceee 100644 --- a/tests/all/invoke_func_via_table.rs +++ b/tests/all/invoke_func_via_table.rs @@ -26,7 +26,8 @@ fn test_invoke_func_via_table() -> Result<()> { .unwrap() .unwrap() .clone(); - let result = f.call(&mut store, &[]).unwrap(); - assert_eq!(result[0].unwrap_i64(), 42); + let mut results = [Val::I32(0)]; + f.call(&mut store, &[], &mut results).unwrap(); + assert_eq!(results[0].unwrap_i64(), 42); Ok(()) } diff --git a/tests/all/linker.rs b/tests/all/linker.rs index a5c3471278..f89adb0301 100644 --- a/tests/all/linker.rs +++ b/tests/all/linker.rs @@ -285,7 +285,7 @@ fn funcs_live_on_to_fight_another_day() -> Result<()> { assert_eq!(flag.load(SeqCst), 0); let mut store = Store::new(&engine, ()); let func = linker.get(&mut store, "", Some("")).unwrap(); - func.into_func().unwrap().call(&mut store, &[])?; + func.into_func().unwrap().call(&mut store, &[], &mut [])?; assert_eq!(flag.load(SeqCst), 0); Ok(()) }; diff --git a/tests/all/traps.rs b/tests/all/traps.rs index d2e707b13a..857c870359 100644 --- a/tests/all/traps.rs +++ b/tests/all/traps.rs @@ -329,17 +329,17 @@ fn mismatched_arguments() -> Result<()> { let instance = Instance::new(&mut store, &module, &[])?; let func = instance.get_func(&mut store, "foo").unwrap(); assert_eq!( - func.call(&mut store, &[]).unwrap_err().to_string(), + func.call(&mut store, &[], &mut []).unwrap_err().to_string(), "expected 1 arguments, got 0" ); assert_eq!( - func.call(&mut store, &[Val::F32(0)]) + func.call(&mut store, &[Val::F32(0)], &mut []) .unwrap_err() .to_string(), "argument type mismatch: found f32 but expected i32", ); assert_eq!( - func.call(&mut store, &[Val::I32(0), Val::I32(1)]) + func.call(&mut store, &[Val::I32(0), Val::I32(1)], &mut []) .unwrap_err() .to_string(), "expected 1 arguments, got 2" @@ -519,7 +519,10 @@ fn parse_dwarf_info() -> Result<()> { ); linker.module(&mut store, "", &module)?; let run = linker.get_default(&mut store, "")?; - let trap = run.call(&mut store, &[]).unwrap_err().downcast::()?; + let trap = run + .call(&mut store, &[], &mut []) + .unwrap_err() + .downcast::()?; let mut found = false; for frame in trap.trace() { diff --git a/tests/host_segfault.rs b/tests/host_segfault.rs index 57edb07bac..7a761678b8 100644 --- a/tests/host_segfault.rs +++ b/tests/host_segfault.rs @@ -139,7 +139,7 @@ fn main() { overrun_the_stack(); }) }); - run_future(f.call_async(&mut store, &[])).unwrap(); + run_future(f.call_async(&mut store, &[], &mut [])).unwrap(); unreachable!(); }, true, @@ -157,7 +157,7 @@ fn main() { overrun_the_stack(); }) }); - run_future(f.call_async(&mut store, &[])).unwrap(); + run_future(f.call_async(&mut store, &[], &mut [])).unwrap(); unreachable!(); }, true, From 1a5a2c7c5d43cfdf0c5086e83e3254ade7800e51 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 21 Sep 2021 15:30:07 -0500 Subject: [PATCH 26/93] Fix a merge conflict on `main` (#3378) This commit fixes a "merge conflict" with #3319 being merged into `main`, causing CI failures on merge. --- crates/wasmtime/src/func.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index abadbdb341..942c6ed426 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -766,8 +766,9 @@ impl Func { bail!("cross-`Store` values are not currently supported"); } } + let externref_params = ty.as_wasm_func_type().externref_params_count(); - let mut values_vec = write_params(store.0, params, results)?; + let mut values_vec = write_params(store.0, externref_params, params, results)?; // Call the trampoline. unsafe { @@ -789,6 +790,7 @@ impl Func { fn write_params( store: &mut StoreOpaque, + externref_params: usize, params: &[Val], results: &mut [Val], ) -> Result> { @@ -805,7 +807,7 @@ impl Func { // because otherwise we would either keep filling up the bump chunk // and making it larger and larger or we would always take the slow // path when inserting references into the table. - if ty.as_wasm_func_type().externref_params_count() + if externref_params > store .externref_activations_table() .bump_capacity_remaining() From 335177a97e1365d7b8e0091647d521a8a37bfa26 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 22 Sep 2021 09:42:18 +0100 Subject: [PATCH 27/93] Remove legacy backend from test Copyright (c) 2021, Arm Limited --- cranelift/filetests/filetests/runtests/simd-vselect.clif | 2 -- 1 file changed, 2 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/simd-vselect.clif b/cranelift/filetests/filetests/runtests/simd-vselect.clif index 8a78664f5b..5c910df60d 100644 --- a/cranelift/filetests/filetests/runtests/simd-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-vselect.clif @@ -4,8 +4,6 @@ test run target aarch64 set enable_simd target x86_64 machinst -set enable_simd -target x86_64 legacy haswell function %vselect_i8x16() -> i8x16 { block0: From 63d85e1dc3dc864a10d748c3d4465ef17709319c Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 22 Sep 2021 11:43:57 +0100 Subject: [PATCH 28/93] Prevent running `simd-vhighbits.clif` on legacy backend. Copyright (c) 2021, Arm Limited. --- cranelift/filetests/filetests/runtests/simd-vhighbits.clif | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif index 2a9c5d1a75..a5d7146f11 100644 --- a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif +++ b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 +target x86_64 machinst function %vhighbits_i8x16(i8x16) -> i16 { block0(v0: i8x16): From 02ff19f2fc1d6e43e4778fd563ac41d95f8062ae Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 8 Sep 2021 09:18:38 +0100 Subject: [PATCH 29/93] Implement `SqmulRoundSat` for interpreter Implemented `SqmulRoundSat` for the Cranelift interpreter, performing QN-format fixed point multiplication for 16 and 32-bit integers in SIMD vectors. Copyright (c) 2021, Arm Limited --- .../runtests/simd-sqmulroundsat-aarch64.clif | 12 ++++++++ .../runtests/simd-sqmulroundsat.clif | 13 +++++++++ cranelift/interpreter/src/step.rs | 29 ++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif new file mode 100644 index 0000000000..f6809ddc5c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif @@ -0,0 +1,12 @@ +test interpret +test run +target aarch64 +;; x86_64 hasn't implemented this for `i32x4` + +function %sqmulrs_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = sqmul_round_sat v0, v1 + return v2 +} +; run: %sqmulrs_i32x4([1000 2000 3000 4000], [10000 100000 1000000 10000000]) == [0 0 1 19] +; run: %sqmulrs_i32x4([2147483647 -2147483648 -2147483648 0], [2147483647 -2147483648 2147483647 0]) == [2147483646 2147483647 -2147483647 0] diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif new file mode 100644 index 0000000000..1faa3592ad --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif @@ -0,0 +1,13 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = sqmul_round_sat v0, v1 + return v2 +} +; run: %sqmulrs_i16x8([1 2 3 4 5 6 7 8], [1 10 100 1000 10000 15000 20000 25000]) == [0 0 0 0 2 3 4 6] +; run: %sqmulrs_i16x8([32767 32767 -32768 -32768 -32768 -32768 0 0], [32767 32767 -32768 -32768 32767 32767 0 0]) == [32766 32766 32767 32767 -32767 -32767 0 0] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 7fc16b06f7..8699f484b0 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -941,7 +941,34 @@ where .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } - Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), + Opcode::SqmulRoundSat => { + let lane_type = ctrl_ty.lane_type(); + let double_width = ctrl_ty.double_width().unwrap().lane_type(); + let arg0 = extractlanes(&arg(0)?, lane_type)?; + let arg1 = extractlanes(&arg(1)?, lane_type)?; + let (min, max) = lane_type.bounds(true); + let min: V = Value::int(min as i128, double_width)?; + let max: V = Value::int(max as i128, double_width)?; + let new_vec = arg0 + .into_iter() + .zip(arg1.into_iter()) + .map(|(x, y)| { + let x = x.into_int()?; + let y = y.into_int()?; + // temporarily double width of the value to avoid overflow. + let z: V = Value::int( + (x * y + (1 << (lane_type.bits() - 2))) >> (lane_type.bits() - 1), + double_width, + )?; + // check bounds, saturate, and truncate to correct width. + let z = Value::min(z, max.clone())?; + let z = Value::max(z, min.clone())?; + let z = z.convert(ValueConversionKind::Truncate(lane_type))?; + Ok(z) + }) + .collect::>>()?; + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), // TODO: these instructions should be removed once the new backend makes these obsolete From cb30ecc7bcc9602881ea401b51dd54c6812674ce Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Tue, 7 Sep 2021 12:19:00 +0100 Subject: [PATCH 30/93] Implement `Iabs` for the interpreter Implemented `Iabs` to return the absolute integer value with wrapping. Copyright (c) 2021, Arm Limited --- .../filetests/filetests/runtests/iabs.clif | 42 +++++++++++++++++++ .../filetests/runtests/simd-iabs.clif | 34 +++++++++++++++ cranelift/interpreter/src/step.rs | 25 ++++++++++- 3 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/runtests/iabs.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-iabs.clif diff --git a/cranelift/filetests/filetests/runtests/iabs.clif b/cranelift/filetests/filetests/runtests/iabs.clif new file mode 100644 index 0000000000..acf2bf8584 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/iabs.clif @@ -0,0 +1,42 @@ +test interpret +; aarch64 & x86_64 only support vector iabs + +function %iabs_i8(i8) -> i8 { +block0(v0: i8): + v1 = iabs v0 + return v1 +} +; run: %iabs_i8(0) == 0 +; run: %iabs_i8(127) == 127 +; run: %iabs_i8(-127) == 127 +; run: %iabs_i8(-128) == -128 + +function %iabs_i16(i16) -> i16 { +block0(v0: i16): + v1 = iabs v0 + return v1 +} +; run: %iabs_i16(0) == 0 +; run: %iabs_i16(32767) == 32767 +; run: %iabs_i16(-32767) == 32767 +; run: %iabs_i16(-32768) == -32768 + +function %iabs_i32(i32) -> i32 { +block0(v0: i32): + v1 = iabs v0 + return v1 +} +; run: %iabs_i32(0) == 0 +; run: %iabs_i32(2147483647) == 2147483647 +; run: %iabs_i32(-2147483647) == 2147483647 +; run: %iabs_i32(-2147483648) == -2147483648 + +function %iabs_i64(i64) -> i64 { +block0(v0: i64): + v1 = iabs v0 + return v1 +} +; run: %iabs_i64(0) == 0 +; run: %iabs_i64(9223372036854775807) == 9223372036854775807 +; run: %iabs_i64(-9223372036854775807) == 9223372036854775807 +; run: %iabs_i64(-9223372036854775808) == -9223372036854775808 \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/simd-iabs.clif b/cranelift/filetests/filetests/runtests/simd-iabs.clif new file mode 100644 index 0000000000..ee1db6762c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-iabs.clif @@ -0,0 +1,34 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %iabs_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iabs v0 + return v1 +} +; run: %iabs_i8x16([0 0 0 0 127 127 127 127 -127 -127 -127 -127 -128 -128 -128 -128]) == [0 0 0 0 127 127 127 127 127 127 127 127 -128 -128 -128 -128] + +function %iabs_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iabs v0 + return v1 +} +; run: %iabs_i16x8([0 0 32767 32767 -32767 -32767 -32768 -32768]) == [0 0 32767 32767 32767 32767 -32768 -32768] + +function %iabs_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iabs v0 + return v1 +} +; run: %iabs_i32x4([0 2147483647 -2147483647 -2147483648]) == [0 2147483647 2147483647 -2147483648] + +function %iabs_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iabs v0 + return v1 +} +; run: %iabs_i64x2([0 9223372036854775807]) == [0 9223372036854775807] +; run: %iabs_i64x2([-9223372036854775807 -9223372036854775808]) == [9223372036854775807 -9223372036854775808] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 7fc16b06f7..f55421420a 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -557,7 +557,30 @@ where false, )?), Opcode::Ineg => binary(Value::sub, Value::int(0, ctrl_ty)?, arg(0)?)?, - Opcode::Iabs => unimplemented!("Iabs"), + Opcode::Iabs => { + let (min_val, _) = ctrl_ty.lane_type().bounds(true); + let min_val: V = Value::int(min_val as i128, ctrl_ty.lane_type())?; + if ctrl_ty.is_vector() { + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let new_vec = arg0 + .into_iter() + .map(|lane| { + if Value::eq(&lane, &min_val)? { + Ok(min_val.clone()) + } else { + Value::int(lane.into_int()?.abs(), ctrl_ty.lane_type()) + } + }) + .collect::>>()?; + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } else { + assign(if Value::eq(&arg(0)?, &min_val)? { + min_val.clone() + } else { + Value::int(arg(0)?.into_int()?.abs(), ctrl_ty.lane_type())? + }) + } + } Opcode::Imul => binary(Value::mul, arg(0)?, arg(1)?)?, Opcode::Umulhi | Opcode::Smulhi => { let double_length = match ctrl_ty.lane_bits() { From 539b1de5f4894d21d7bef422978a11254ce91882 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 22 Sep 2021 13:48:59 +0100 Subject: [PATCH 31/93] Prevent test running on legacy backend. Copyright (c) 2021, Arm Limited --- cranelift/filetests/filetests/runtests/simd-iabs.clif | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/filetests/filetests/runtests/simd-iabs.clif b/cranelift/filetests/filetests/runtests/simd-iabs.clif index ee1db6762c..022815c4a4 100644 --- a/cranelift/filetests/filetests/runtests/simd-iabs.clif +++ b/cranelift/filetests/filetests/runtests/simd-iabs.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 +target x86_64 machinst function %iabs_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): From faaf6b537a5622efd1702693b0b04842a434df33 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 22 Sep 2021 13:50:31 +0100 Subject: [PATCH 32/93] Prevent running tests on legacy backend. Copyright (c) 2021, Arm Limited --- cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif index 1faa3592ad..a6ada04f22 100644 --- a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 +target x86_64 machinst function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 { block0(v0: i16x8, v1: i16x8): From 5cdaf3d08516a773f1099288ade5398aebdba848 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 22 Sep 2021 07:12:33 -0700 Subject: [PATCH 33/93] Disable simd in the instantiate-swarm target Something I forgot from #3376 --- fuzz/fuzz_targets/instantiate-swarm.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/fuzz/fuzz_targets/instantiate-swarm.rs b/fuzz/fuzz_targets/instantiate-swarm.rs index 676d6f780b..0ebc708580 100644 --- a/fuzz/fuzz_targets/instantiate-swarm.rs +++ b/fuzz/fuzz_targets/instantiate-swarm.rs @@ -25,7 +25,6 @@ fn run(data: &[u8]) -> Result<()> { // that aren't otherwise enabled by default. We want to test all of these in // Wasmtime. let mut config: SwarmConfig = u.arbitrary()?; - config.simd_enabled = u.arbitrary()?; config.module_linking_enabled = u.arbitrary()?; config.memory64_enabled = u.arbitrary()?; // Don't generate modules that allocate more than 6GB @@ -35,7 +34,6 @@ fn run(data: &[u8]) -> Result<()> { let mut cfg = wasmtime_fuzzing::fuzz_default_config(Strategy::Auto).unwrap(); cfg.wasm_multi_memory(config.max_memories > 1); cfg.wasm_module_linking(config.module_linking_enabled); - cfg.wasm_simd(config.simd_enabled); cfg.wasm_memory64(config.memory64_enabled); oracles::instantiate_with_config(&module.to_bytes(), true, cfg, timeout); From b5e289d319b2788bb4b6133792546007f7900443 Mon Sep 17 00:00:00 2001 From: Olivier Lemasle Date: Wed, 22 Sep 2021 22:08:35 +0200 Subject: [PATCH 34/93] Add license file to wasmtime-types The LICENSE file is missing in wasmtime-types crate. As per the Apache 2.0 license, the license file itself should be redistributed with the source code. --- crates/types/LICENSE | 220 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 crates/types/LICENSE diff --git a/crates/types/LICENSE b/crates/types/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/crates/types/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + From 930b1f17f0f1433fa7c97a52759c25d36abe56c6 Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Tue, 14 Sep 2021 22:05:26 +0100 Subject: [PATCH 35/93] Cranelift AArch64: Implement scalar FmaxPseudo and FminPseudo Copyright (c) 2021, Arm Limited. --- .../codegen/src/isa/aarch64/lower_inst.rs | 66 ++++++++++++------- .../filetests/runtests/fmin-max-pseudo.clif | 2 +- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index c4eebe15b0..655ea8458c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2926,42 +2926,62 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::FminPseudo | Opcode::FmaxPseudo => { - let ty = ctx.input_ty(insn, 0); - if ty == F32X4 || ty == F64X2 { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let (ra, rb) = if op == Opcode::FminPseudo { + (rm, rn) + } else { + (rn, rm) + }; + let ty = ty.unwrap(); + let lane_type = ty.lane_type(); + + debug_assert!(lane_type == F32 || lane_type == F64); + + if ty.is_vector() { + let size = VectorSize::from_ty(ty); + // pmin(a,b) => bitsel(b, a, cmpgt(a, b)) // pmax(a,b) => bitsel(b, a, cmpgt(b, a)) - let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - // Since we're going to write the output register `r_dst` anyway, we might as - // well first use it to hold the comparison result. This has the slightly unusual + // Since we're going to write the output register `rd` anyway, we might as well + // first use it to hold the comparison result. This has the slightly unusual // effect that we modify the output register in the first instruction (`fcmgt`) // but read both the inputs again in the second instruction (`bsl`), which means // that the output register can't be either of the input registers. Regalloc // should handle this correctly, nevertheless. ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Fcmgt, - rd: r_dst, - rn: if op == Opcode::FminPseudo { r_a } else { r_b }, - rm: if op == Opcode::FminPseudo { r_b } else { r_a }, - size: if ty == F32X4 { - VectorSize::Size32x4 - } else { - VectorSize::Size64x2 - }, + rd, + rn: ra, + rm: rb, + size, }); ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Bsl, - rd: r_dst, - rn: r_b, - rm: r_a, - size: VectorSize::Size8x16, + rd, + rn, + rm, + size, }); } else { - return Err(CodegenError::Unsupported(format!( - "{}: Unsupported type: {:?}", - op, ty - ))); + if lane_type == F32 { + ctx.emit(Inst::FpuCmp32 { rn: ra, rm: rb }); + ctx.emit(Inst::FpuCSel32 { + rd, + rn, + rm, + cond: Cond::Gt, + }); + } else { + ctx.emit(Inst::FpuCmp64 { rn: ra, rm: rb }); + ctx.emit(Inst::FpuCSel64 { + rd, + rn, + rm, + cond: Cond::Gt, + }); + } } } diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif index a1273f9063..7fd70504f1 100644 --- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif @@ -1,6 +1,6 @@ test run ; target s390x TODO: Not yet implemented on s390x -; target aarch64 TODO: Not yet implemented on aarch64 +target aarch64 set enable_simd target x86_64 machinst skylake From 476d0bee960b77e0bc9f50621952e6eb23639272 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 23 Sep 2021 08:48:11 -0700 Subject: [PATCH 36/93] Allow another trap mismatch with v8 If Wasmtime thinks a module stack-overflows and v8 says that it does something else that's ok. This means that the limits on v8 and Wasmtime are different which is expected and not something we want fuzz-bugs about. --- crates/fuzzing/src/oracles/v8.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/fuzzing/src/oracles/v8.rs b/crates/fuzzing/src/oracles/v8.rs index ff16757c00..4016b1ffdf 100644 --- a/crates/fuzzing/src/oracles/v8.rs +++ b/crates/fuzzing/src/oracles/v8.rs @@ -287,7 +287,18 @@ fn assert_error_matches(wasmtime: &anyhow::Error, v8: &str) { TrapCode::IntegerDivisionByZero => { return verify_v8(&["divide by zero", "remainder by zero"]) } - TrapCode::StackOverflow => return verify_v8(&["call stack size exceeded"]), + TrapCode::StackOverflow => { + return verify_v8(&[ + "call stack size exceeded", + // Similar to the above comment in `UnreachableCodeReached` + // if wasmtime hits a stack overflow but v8 ran all the way + // to when the `unreachable` instruction was hit then that's + // ok. This just means that wasmtime either has less optimal + // codegen or different limits on the stack than v8 does, + // which isn't an issue per-se. + "unreachable", + ]); + } TrapCode::IndirectCallToNull => return verify_v8(&["null function"]), TrapCode::TableOutOfBounds => { return verify_v8(&[ From 0fb3acfb94516d7171a30353a308476dee70792b Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Fri, 10 Sep 2021 17:23:24 +0100 Subject: [PATCH 37/93] Cranelift AArch64: Fix the VanyTrue implementation for 64-bit elements Copyright (c) 2021, Arm Limited. --- .../codegen/src/isa/aarch64/inst/emit.rs | 6 +- .../codegen/src/isa/aarch64/lower_inst.rs | 10 ++- .../filetests/runtests/simd-logical.clif | 74 +++++++++++++++++-- .../filetests/runtests/simd-vanytrue.clif | 2 +- 4 files changed, 83 insertions(+), 9 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index d8216972ab..19c2764e94 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2338,7 +2338,11 @@ impl MachInstEmit for Inst { VecALUOp::Orr => (0b000_01110_10_1, 0b000111), VecALUOp::Eor => (0b001_01110_00_1, 0b000111), VecALUOp::Bsl => (0b001_01110_01_1, 0b000111), - VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001), + VecALUOp::Umaxp => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b001_01110_00_1 | enc_size << 1, 0b101001) + } VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Mul => { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index c4eebe15b0..c7ce1a58af 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2397,7 +2397,15 @@ pub(crate) fn lower_insn_to_regs>( // cmp xm, #0 // cset xm, ne - let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); + let s = VectorSize::from_ty(src_ty); + let size = if s == VectorSize::Size64x2 { + // `vall_true` with 64-bit elements is handled elsewhere. + debug_assert_ne!(op, Opcode::VallTrue); + + VectorSize::Size32x4 + } else { + s + }; if op == Opcode::VanyTrue { ctx.emit(Inst::VecRRR { diff --git a/cranelift/filetests/filetests/runtests/simd-logical.clif b/cranelift/filetests/filetests/runtests/simd-logical.clif index 48470cdb35..6480dd98f7 100644 --- a/cranelift/filetests/filetests/runtests/simd-logical.clif +++ b/cranelift/filetests/filetests/runtests/simd-logical.clif @@ -3,8 +3,6 @@ target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd target x86_64 machinst -set enable_simd -target x86_64 legacy skylake function %bnot() -> b32 { block0: @@ -26,13 +24,77 @@ block0: } ; run -function %vany_true_i16x8() -> b1 { +function %vany_true_i8x16() -> b1, b1 { block0: - v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] v1 = vany_true v0 - return v1 + + v2 = vconst.i8x16 [0 0 0 1 0 0 0 0 0 0 42 0 0 0 0 0] + v3 = vany_true v2 + + return v1, v3 } -; run +; run: %vany_true_i8x16() == [false, true] + +function %vany_true_i16x8() -> b1, b1 { +block0: + v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] + v1 = vany_true v0 + + v2 = vconst.i16x8 [0 0 42 0 0 0 0 0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i16x8() == [false, true] + +function %vany_true_i32x4() -> b1, b1 { +block0: + v0 = vconst.i32x4 [0 0 0 0] + v1 = vany_true v0 + + v2 = vconst.i32x4 [0 42 0 0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i32x4() == [false, true] + +function %vany_true_i64x2() -> b1, b1 { +block0: + v0 = vconst.i64x2 [0 0] + v1 = vany_true v0 + + v2 = vconst.i64x2 [0 1] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i64x2() == [false, true] + +function %vany_true_f32x4() -> b1, b1 { +block0: + v0 = vconst.f32x4 [0.0 0.0 0.0 0.0] + v1 = vany_true v0 + + v2 = vconst.f32x4 [0.0 0x4.2 0.0 0.0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_f32x4() == [false, true] + +function %vany_true_f64x2() -> b1, b1 { +block0: + v0 = vconst.f64x2 [0.0 0.0] + v1 = vany_true v0 + + v2 = vconst.f64x2 [0.0 0x1.0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_f64x2() == [false, true] function %vany_true_b32x4() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif index fd206d54ef..cf407ea606 100644 --- a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif @@ -1,7 +1,7 @@ test interpret test run +target aarch64 target x86_64 machinst -; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304 function %vany_true_b8x16(b8x16) -> b1 { block0(v0: b8x16): From 1a778c2fe4284184fa36327cf24ac5ed9a5bf027 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 24 Sep 2021 09:19:56 -0700 Subject: [PATCH 38/93] Cranelift: add agenda item for further discussion of ISLE in upcoming Oct 4 meeting. --- meetings/cranelift/2021/cranelift-10-04.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meetings/cranelift/2021/cranelift-10-04.md b/meetings/cranelift/2021/cranelift-10-04.md index c65f8e031d..f20701f7a2 100644 --- a/meetings/cranelift/2021/cranelift-10-04.md +++ b/meetings/cranelift/2021/cranelift-10-04.md @@ -11,7 +11,7 @@ 1. Announcements 1. _Sumbit a PR to add your announcement here_ 1. Other agenda items - 1. _Sumbit a PR to add your item here_ + 1. Further discussion of [ISLE DSL](https://github.com/bytecodealliance/rfcs/pull/15) ## Notes From bfdbd10a13ec52b7667754000f14b5ef5f5c1c8c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 24 Sep 2021 14:05:45 -0500 Subject: [PATCH 39/93] Add `*_unchecked` variants of `Func` APIs for the C API (#3350) * Add `*_unchecked` variants of `Func` APIs for the C API This commit is what is hopefully going to be my last installment within the saga of optimizing function calls in/out of WebAssembly modules in the C API. This is yet another alternative approach to #3345 (sorry) but also contains everything necessary to make the C API fast. As in #3345 the general idea is just moving checks out of the call path in the same style of `TypedFunc`. This new strategy takes inspiration from previously learned attempts effectively "just" exposes how we previously passed `*mut u128` through trampolines for arguments/results. This storage format is formalized through a new `ValRaw` union that is exposed from the `wasmtime` crate. By doing this it made it relatively easy to expose two new APIs: * `Func::new_unchecked` * `Func::call_unchecked` These are the same as their checked equivalents except that they're `unsafe` and they work with `*mut ValRaw` rather than safe slices of `Val`. Working with these eschews type checks and such and requires callers/embedders to do the right thing. These two new functions are then exposed via the C API with new functions, enabling C to have a fast-path of calling/defining functions. This fast path is akin to `Func::wrap` in Rust, although that API can't be built in C due to C not having generics in the same way that Rust has. For some benchmarks, the benchmarks here are: * `nop` - Call a wasm function from the host that does nothing and returns nothing. * `i64` - Call a wasm function from the host, the wasm function calls a host function, and the host function returns an `i64` all the way out to the original caller. * `many` - Call a wasm function from the host, the wasm calls host function with 5 `i32` parameters, and then an `i64` result is returned back to the original host * `i64` host - just the overhead of the wasm calling the host, so the wasm calls the host function in a loop. * `many` host - same as `i64` host, but calling the `many` host function. All numbers in this table are in nanoseconds, and this is just one measurement as well so there's bound to be some variation in the precise numbers here. | Name | Rust | C (before) | C (after) | |-----------|------|------------|-----------| | nop | 19 | 112 | 25 | | i64 | 22 | 207 | 32 | | many | 27 | 189 | 34 | | i64 host | 2 | 38 | 5 | | many host | 7 | 75 | 8 | The main conclusion here is that the C API is significantly faster than before when using the `*_unchecked` variants of APIs. The Rust implementation is still the ceiling (or floor I guess?) for performance The main reason that C is slower than Rust is that a little bit more has to travel through memory where on the Rust side of things we can monomorphize and inline a bit more to get rid of that. Overall though the costs are way way down from where they were originally and I don't plan on doing a whole lot more myself at this time. There's various things we theoretically could do I've considered but implementation-wise I think they'll be much more weighty. * Tweak `wasmtime_externref_t` API comments --- crates/c-api/include/wasmtime/func.h | 128 ++++++++++++ crates/c-api/include/wasmtime/linker.h | 23 +++ crates/c-api/include/wasmtime/val.h | 60 ++++++ crates/c-api/src/func.rs | 60 +++++- crates/c-api/src/linker.rs | 29 ++- crates/c-api/src/val.rs | 24 ++- crates/runtime/src/externref.rs | 1 + crates/runtime/src/lib.rs | 2 +- crates/runtime/src/vmcontext.rs | 20 +- crates/wasmtime/src/externals.rs | 30 +-- crates/wasmtime/src/func.rs | 270 ++++++++++++++++--------- crates/wasmtime/src/linker.rs | 20 +- crates/wasmtime/src/ref.rs | 59 ++++++ crates/wasmtime/src/store.rs | 16 +- crates/wasmtime/src/trampoline/func.rs | 8 +- crates/wasmtime/src/values.rs | 126 +++++------- 16 files changed, 659 insertions(+), 217 deletions(-) diff --git a/crates/c-api/include/wasmtime/func.h b/crates/c-api/include/wasmtime/func.h index f254d922aa..83339e8b2b 100644 --- a/crates/c-api/include/wasmtime/func.h +++ b/crates/c-api/include/wasmtime/func.h @@ -87,6 +87,75 @@ WASM_API_EXTERN void wasmtime_func_new( wasmtime_func_t *ret ); +/** + * \brief Callback signature for #wasmtime_func_new_unchecked. + * + * This is the function signature for host functions that can be made accessible + * to WebAssembly. The arguments to this function are: + * + * \param env user-provided argument passed to #wasmtime_func_new_unchecked + * \param caller a temporary object that can only be used during this function + * call. Used to acquire #wasmtime_context_t or caller's state + * \param args_and_results storage space for both the parameters to the + * function as well as the results of the function. The size of this + * array depends on the function type that the host function is created + * with, but it will be the maximum of the number of parameters and + * number of results. + * + * This callback can optionally return a #wasm_trap_t indicating that a trap + * should be raised in WebAssembly. It's expected that in this case the caller + * relinquishes ownership of the trap and it is passed back to the engine. + * + * This differs from #wasmtime_func_callback_t in that the payload of + * `args_and_results` does not have type information, nor does it have sizing + * information. This is especially unsafe because it's only valid within the + * particular #wasm_functype_t that the function was created with. The onus is + * on the embedder to ensure that `args_and_results` are all read correctly + * for parameters and all written for results within the execution of a + * function. + * + * Parameters will be listed starting at index 0 in the `args_and_results` + * array. Results are also written starting at index 0, which will overwrite + * the arguments. + */ +typedef wasm_trap_t* (*wasmtime_func_unchecked_callback_t)( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results); + +/** + * \brief Creates a new host function in the same manner of #wasmtime_func_new, + * but the function-to-call has no type information available at runtime. + * + * This function is very similar to #wasmtime_func_new. The difference is that + * this version is "more unsafe" in that when the host callback is invoked there + * is no type information and no checks that the right types of values are + * produced. The onus is on the consumer of this API to ensure that all + * invariants are upheld such as: + * + * * The host callback reads parameters correctly and interprets their types + * correctly. + * * If a trap doesn't happen then all results are written to the results + * pointer. All results must have the correct type. + * * Types such as `funcref` cannot cross stores. + * * Types such as `externref` have valid reference counts. + * + * It's generally only recommended to use this if your application can wrap + * this in a safe embedding. This should not be frequently used due to the + * number of invariants that must be upheld on the wasm<->host boundary. On the + * upside, though, this flavor of host function will be faster to call than + * those created by #wasmtime_func_new (hence the reason for this function's + * existence). + */ +WASM_API_EXTERN void wasmtime_func_new_unchecked( + wasmtime_context_t *store, + const wasm_functype_t* type, + wasmtime_func_unchecked_callback_t callback, + void *env, + void (*finalizer)(void*), + wasmtime_func_t *ret +); + /** * \brief Returns the type of the function specified * @@ -142,6 +211,39 @@ WASM_API_EXTERN wasmtime_error_t *wasmtime_func_call( wasm_trap_t **trap ); +/** + * \brief Call a WebAssembly function in an "unchecked" fashion. + * + * This function is similar to #wasmtime_func_call except that there is no type + * information provided with the arguments (or sizing information). Consequently + * this is less safe to call since it's up to the caller to ensure that `args` + * has an appropriate size and all the parameters are configured with their + * appropriate values/types. Additionally all the results must be interpreted + * correctly if this function returns successfully. + * + * Parameters must be specified starting at index 0 in the `args_and_results` + * array. Results are written starting at index 0, which will overwrite + * the arguments. + * + * Callers must ensure that various correctness variants are upheld when this + * API is called such as: + * + * * The `args_and_results` pointer has enough space to hold all the parameters + * and all the results (but not at the same time). + * * Parameters must all be configured as if they were the correct type. + * * Values such as `externref` and `funcref` are valid within the store being + * called. + * + * When in doubt it's much safer to call #wasmtime_func_call. This function is + * faster than that function, but the tradeoff is that embeddings must uphold + * more invariants rather than relying on Wasmtime to check them for you. + */ +WASM_API_EXTERN wasm_trap_t *wasmtime_func_call_unchecked( + wasmtime_context_t *store, + const wasmtime_func_t *func, + wasmtime_val_raw_t *args_and_results +); + /** * \brief Loads a #wasmtime_extern_t from the caller's context * @@ -172,6 +274,32 @@ WASM_API_EXTERN bool wasmtime_caller_export_get( */ WASM_API_EXTERN wasmtime_context_t* wasmtime_caller_context(wasmtime_caller_t* caller); +/** + * \brief Converts a `raw` nonzero `funcref` value from #wasmtime_val_raw_t + * into a #wasmtime_func_t. + * + * This function can be used to interpret nonzero values of the `funcref` field + * of the #wasmtime_val_raw_t structure. It is assumed that `raw` does not have + * a value of 0, or otherwise the program will abort. + * + * Note that this function is unchecked and unsafe. It's only safe to pass + * values learned from #wasmtime_val_raw_t with the same corresponding + * #wasmtime_context_t that they were produced from. Providing arbitrary values + * to `raw` here or cross-context values with `context` is UB. + */ +WASM_API_EXTERN void wasmtime_func_from_raw( + wasmtime_context_t* context, + size_t raw, + wasmtime_func_t *ret); + +/** + * \brief Converts a `func` which belongs to `context` into a `usize` + * parameter that is suitable for insertion into a #wasmtime_val_raw_t. + */ +WASM_API_EXTERN size_t wasmtime_func_to_raw( + wasmtime_context_t* context, + const wasmtime_func_t *func); + #ifdef __cplusplus } // extern "C" #endif diff --git a/crates/c-api/include/wasmtime/linker.h b/crates/c-api/include/wasmtime/linker.h index 09bc0bb10a..edd52442df 100644 --- a/crates/c-api/include/wasmtime/linker.h +++ b/crates/c-api/include/wasmtime/linker.h @@ -102,6 +102,8 @@ WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define( * Note that this function does not create a #wasmtime_func_t. This creates a * store-independent function within the linker, allowing this function * definition to be used with multiple stores. + * + * For more information about host callbacks see #wasmtime_func_new. */ WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define_func( wasmtime_linker_t *linker, @@ -115,6 +117,27 @@ WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define_func( void (*finalizer)(void*) ); +/** + * \brief Defines a new function in this linker. + * + * This is the same as #wasmtime_linker_define_func except that it's the analog + * of #wasmtime_func_new_unchecked instead of #wasmtime_func_new. Be sure to + * consult the documentation of #wasmtime_linker_define_func for argument + * information as well as #wasmtime_func_new_unchecked for why this is an + * unsafe API. + */ +WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define_func_unchecked( + wasmtime_linker_t *linker, + const char *module, + size_t module_len, + const char *name, + size_t name_len, + const wasm_functype_t *ty, + wasmtime_func_unchecked_callback_t cb, + void *data, + void (*finalizer)(void*) +); + /** * \brief Defines WASI functions in this linker. * diff --git a/crates/c-api/include/wasmtime/val.h b/crates/c-api/include/wasmtime/val.h index 43b40ff77e..c17f605cd8 100644 --- a/crates/c-api/include/wasmtime/val.h +++ b/crates/c-api/include/wasmtime/val.h @@ -63,6 +63,29 @@ WASM_API_EXTERN wasmtime_externref_t *wasmtime_externref_clone(wasmtime_externre */ WASM_API_EXTERN void wasmtime_externref_delete(wasmtime_externref_t *ref); +/** + * \brief Converts a raw `externref` value coming from #wasmtime_val_raw_t into + * a #wasmtime_externref_t. + * + * Note that the returned #wasmtime_externref_t is an owned value that must be + * deleted via #wasmtime_externref_delete by the caller if it is non-null. + */ +WASM_API_EXTERN wasmtime_externref_t *wasmtime_externref_from_raw(wasmtime_context_t *context, size_t raw); + +/** + * \brief Converts a #wasmtime_externref_t to a raw value suitable for storing + * into a #wasmtime_val_raw_t. + * + * Note that the returned underlying value is not tracked by Wasmtime's garbage + * collector until it enters WebAssembly. This means that a GC may release the + * context's reference to the raw value, making the raw value invalid within the + * context of the store. Do not perform a GC between calling this function and + * passing it to WebAssembly. + */ +WASM_API_EXTERN size_t wasmtime_externref_to_raw( + wasmtime_context_t *context, + const wasmtime_externref_t *ref); + /// \brief Discriminant stored in #wasmtime_val::kind typedef uint8_t wasmtime_valkind_t; /// \brief Value of #wasmtime_valkind_t meaning that #wasmtime_val_t is an i32 @@ -117,6 +140,43 @@ typedef union wasmtime_valunion { wasmtime_v128 v128; } wasmtime_valunion_t; +/** + * \typedef wasmtime_val_raw_t + * \brief Convenience alias for #wasmtime_val_raw + * + * \union wasmtime_val_raw + * \brief Container for possible wasm values. + * + * This type is used on conjunction with #wasmtime_func_new_unchecked as well + * as #wasmtime_func_call_unchecked. Instances of this type do not have type + * information associated with them, it's up to the embedder to figure out + * how to interpret the bits contained within, often using some other channel + * to determine the type. + */ +typedef union wasmtime_val_raw { + /// Field for when this val is a WebAssembly `i32` value. + int32_t i32; + /// Field for when this val is a WebAssembly `i64` value. + int64_t i64; + /// Field for when this val is a WebAssembly `f32` value. + float32_t f32; + /// Field for when this val is a WebAssembly `f64` value. + float64_t f64; + /// Field for when this val is a WebAssembly `v128` value. + wasmtime_v128 v128; + /// Field for when this val is a WebAssembly `funcref` value. + /// + /// If this is set to 0 then it's a null funcref, otherwise this must be + /// passed to `wasmtime_func_from_raw` to determine the `wasmtime_func_t`. + size_t funcref; + /// Field for when this val is a WebAssembly `externref` value. + /// + /// If this is set to 0 then it's a null externref, otherwise this must be + /// passed to `wasmtime_externref_from_raw` to determine the + /// `wasmtime_externref_t`. + size_t externref; +} wasmtime_val_raw_t; + /** * \typedef wasmtime_val_t * \brief Convenience alias for #wasmtime_val_t diff --git a/crates/c-api/src/func.rs b/crates/c-api/src/func.rs index fe875d951f..76ba0759c0 100644 --- a/crates/c-api/src/func.rs +++ b/crates/c-api/src/func.rs @@ -8,7 +8,7 @@ use std::mem::{self, MaybeUninit}; use std::panic::{self, AssertUnwindSafe}; use std::ptr; use std::str; -use wasmtime::{AsContextMut, Caller, Extern, Func, Trap, Val}; +use wasmtime::{AsContextMut, Caller, Extern, Func, Trap, Val, ValRaw}; #[derive(Clone)] #[repr(transparent)] @@ -208,6 +208,9 @@ pub type wasmtime_func_callback_t = extern "C" fn( usize, ) -> Option>; +pub type wasmtime_func_unchecked_callback_t = + extern "C" fn(*mut c_void, *mut wasmtime_caller_t, *mut ValRaw) -> Option>; + #[no_mangle] pub unsafe extern "C" fn wasmtime_func_new( store: CStoreContextMut<'_>, @@ -271,6 +274,35 @@ pub(crate) unsafe fn c_callback_to_rust_fn( } } +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_new_unchecked( + store: CStoreContextMut<'_>, + ty: &wasm_functype_t, + callback: wasmtime_func_unchecked_callback_t, + data: *mut c_void, + finalizer: Option, + func: &mut Func, +) { + let ty = ty.ty().ty.clone(); + let cb = c_unchecked_callback_to_rust_fn(callback, data, finalizer); + *func = Func::new_unchecked(store, ty, cb); +} + +pub(crate) unsafe fn c_unchecked_callback_to_rust_fn( + callback: wasmtime_func_unchecked_callback_t, + data: *mut c_void, + finalizer: Option, +) -> impl Fn(Caller<'_, crate::StoreData>, *mut ValRaw) -> Result<(), Trap> { + let foreign = crate::ForeignData { data, finalizer }; + move |caller, values| { + let mut caller = wasmtime_caller_t { caller }; + match callback(foreign.data, &mut caller, values) { + None => Ok(()), + Some(trap) => Err(trap.trap), + } + } +} + #[no_mangle] pub unsafe extern "C" fn wasmtime_func_call( mut store: CStoreContextMut<'_>, @@ -329,6 +361,18 @@ pub unsafe extern "C" fn wasmtime_func_call( } } +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_call_unchecked( + store: CStoreContextMut<'_>, + func: &Func, + args_and_results: *mut ValRaw, +) -> *mut wasm_trap_t { + match func.call_unchecked(store, args_and_results) { + Ok(()) => ptr::null_mut(), + Err(trap) => Box::into_raw(Box::new(wasm_trap_t::new(trap))), + } +} + #[no_mangle] pub extern "C" fn wasmtime_func_type( store: CStoreContext<'_>, @@ -362,3 +406,17 @@ pub unsafe extern "C" fn wasmtime_caller_export_get( crate::initialize(item, which.into()); true } + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_from_raw( + store: CStoreContextMut<'_>, + raw: usize, + func: &mut Func, +) { + *func = Func::from_raw(store, raw).unwrap(); +} + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_to_raw(store: CStoreContextMut<'_>, func: &Func) -> usize { + func.to_raw(store) +} diff --git a/crates/c-api/src/linker.rs b/crates/c-api/src/linker.rs index 1b582b50c7..1ff5624a52 100644 --- a/crates/c-api/src/linker.rs +++ b/crates/c-api/src/linker.rs @@ -1,7 +1,6 @@ -use crate::func::c_callback_to_rust_fn; use crate::{ bad_utf8, handle_result, wasm_engine_t, wasm_functype_t, wasm_trap_t, wasmtime_error_t, - wasmtime_extern_t, wasmtime_func_callback_t, wasmtime_module_t, CStoreContextMut, + wasmtime_extern_t, wasmtime_module_t, CStoreContextMut, }; use std::ffi::c_void; use std::mem::MaybeUninit; @@ -64,17 +63,39 @@ pub unsafe extern "C" fn wasmtime_linker_define_func( name: *const u8, name_len: usize, ty: &wasm_functype_t, - callback: wasmtime_func_callback_t, + callback: crate::wasmtime_func_callback_t, data: *mut c_void, finalizer: Option, ) -> Option> { let ty = ty.ty().ty.clone(); let module = to_str!(module, module_len); let name = to_str!(name, name_len); - let cb = c_callback_to_rust_fn(callback, data, finalizer); + let cb = crate::func::c_callback_to_rust_fn(callback, data, finalizer); handle_result(linker.linker.func_new(module, name, ty, cb), |_linker| ()) } +#[no_mangle] +pub unsafe extern "C" fn wasmtime_linker_define_func_unchecked( + linker: &mut wasmtime_linker_t, + module: *const u8, + module_len: usize, + name: *const u8, + name_len: usize, + ty: &wasm_functype_t, + callback: crate::wasmtime_func_unchecked_callback_t, + data: *mut c_void, + finalizer: Option, +) -> Option> { + let ty = ty.ty().ty.clone(); + let module = to_str!(module, module_len); + let name = to_str!(name, name_len); + let cb = crate::func::c_unchecked_callback_to_rust_fn(callback, data, finalizer); + handle_result( + linker.linker.func_new_unchecked(module, name, ty, cb), + |_linker| (), + ) +} + #[cfg(feature = "wasi")] #[no_mangle] pub extern "C" fn wasmtime_linker_define_wasi( diff --git a/crates/c-api/src/val.rs b/crates/c-api/src/val.rs index c088390cb2..a98ece8bfe 100644 --- a/crates/c-api/src/val.rs +++ b/crates/c-api/src/val.rs @@ -1,5 +1,8 @@ use crate::r#ref::{ref_to_val, WasmRefInner}; -use crate::{from_valtype, into_valtype, wasm_ref_t, wasm_valkind_t, wasmtime_valkind_t, WASM_I32}; +use crate::{ + from_valtype, into_valtype, wasm_ref_t, wasm_valkind_t, wasmtime_valkind_t, CStoreContextMut, + WASM_I32, +}; use std::ffi::c_void; use std::mem::{self, ManuallyDrop, MaybeUninit}; use std::ptr; @@ -288,3 +291,22 @@ pub extern "C" fn wasmtime_externref_clone(externref: ManuallyDrop) - #[no_mangle] pub extern "C" fn wasmtime_externref_delete(_val: Option) {} + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_externref_to_raw( + cx: CStoreContextMut<'_>, + val: Option>, +) -> usize { + match val { + Some(ptr) => ptr.to_raw(cx), + None => 0, + } +} + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_externref_from_raw( + _cx: CStoreContextMut<'_>, + val: usize, +) -> Option { + ExternRef::from_raw(val) +} diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs index 340dcf9f2c..7d110a87ad 100644 --- a/crates/runtime/src/externref.rs +++ b/crates/runtime/src/externref.rs @@ -344,6 +344,7 @@ impl VMExternRef { /// Nor does this method increment the reference count. You must ensure /// that `self` (or some other clone of `self`) stays alive until /// `clone_from_raw` is called. + #[inline] pub fn as_raw(&self) -> *mut u8 { let ptr = self.0.cast::().as_ptr(); ptr diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 0be1abec77..4a2ca4d3a2 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -56,7 +56,7 @@ pub use crate::traphandlers::{ pub use crate::vmcontext::{ VMCallerCheckedAnyfunc, VMContext, VMFunctionBody, VMFunctionImport, VMGlobalDefinition, VMGlobalImport, VMInterrupts, VMInvokeArgument, VMMemoryDefinition, VMMemoryImport, - VMSharedSignatureIndex, VMTableDefinition, VMTableImport, VMTrampoline, + VMSharedSignatureIndex, VMTableDefinition, VMTableImport, VMTrampoline, ValRaw, }; /// Version number of this crate. diff --git a/crates/runtime/src/vmcontext.rs b/crates/runtime/src/vmcontext.rs index 21660c47dd..027f414dac 100644 --- a/crates/runtime/src/vmcontext.rs +++ b/crates/runtime/src/vmcontext.rs @@ -790,10 +790,28 @@ impl VMContext { } } +/// A "raw" and unsafe representation of a WebAssembly value. +/// +/// This is provided for use with the `Func::new_unchecked` and +/// `Func::call_unchecked` APIs. In general it's unlikely you should be using +/// this from Rust, rather using APIs like `Func::wrap` and `TypedFunc::call`. +#[allow(missing_docs)] +#[repr(C)] +#[derive(Copy, Clone)] +pub union ValRaw { + pub i32: i32, + pub i64: i64, + pub f32: u32, + pub f64: u64, + pub v128: u128, + pub funcref: usize, + pub externref: usize, +} + /// Trampoline function pointer type. pub type VMTrampoline = unsafe extern "C" fn( *mut VMContext, // callee vmctx *mut VMContext, // caller vmctx *const VMFunctionBody, // function we're actually calling - *mut u128, // space for arguments and return values + *mut ValRaw, // space for arguments and return values ); diff --git a/crates/wasmtime/src/externals.rs b/crates/wasmtime/src/externals.rs index b20a00b9e8..5a5fe3eb9e 100644 --- a/crates/wasmtime/src/externals.rs +++ b/crates/wasmtime/src/externals.rs @@ -1,6 +1,5 @@ use crate::store::{StoreData, StoreOpaque, Stored}; use crate::trampoline::{generate_global_export, generate_table_export}; -use crate::values::{from_checked_anyfunc, into_checked_anyfunc}; use crate::{ AsContext, AsContextMut, ExternRef, ExternType, Func, GlobalType, Instance, Memory, Module, Mutability, TableType, Trap, Val, ValType, @@ -307,7 +306,7 @@ impl Global { .map(|inner| ExternRef { inner }), ), ValType::FuncRef => { - from_checked_anyfunc(definition.as_anyfunc() as *mut _, store.0) + Val::FuncRef(Func::from_raw(store, definition.as_anyfunc() as usize)) } ValType::V128 => Val::V128(*definition.as_u128()), } @@ -438,28 +437,8 @@ impl Table { } fn _new(store: &mut StoreOpaque, ty: TableType, init: Val) -> Result { - if init.ty() != ty.element() { - bail!( - "table initialization value type {:?} does not have expected type {:?}", - init.ty(), - ty.element(), - ); - } let wasmtime_export = generate_table_export(store, &ty)?; - - let init: runtime::TableElement = match ty.element() { - ValType::FuncRef => into_checked_anyfunc(init, store)?.into(), - ValType::ExternRef => init - .externref() - .ok_or_else(|| { - anyhow!("table initialization value does not have expected type `externref`") - })? - .map(|x| x.inner) - .into(), - ty => bail!("unsupported table element type: {:?}", ty), - }; - - // Initialize entries with the init value. + let init = init.into_table_element(store, ty.element())?; unsafe { let table = Table::from_wasmtime_table(wasmtime_export, store); (*table.wasmtime_table(store)) @@ -503,7 +482,10 @@ impl Table { let table = self.wasmtime_table(store); unsafe { match (*table).get(index)? { - runtime::TableElement::FuncRef(f) => Some(from_checked_anyfunc(f, store)), + runtime::TableElement::FuncRef(f) => { + let func = Func::from_caller_checked_anyfunc(store, f); + Some(Val::FuncRef(func)) + } runtime::TableElement::ExternRef(None) => Some(Val::ExternRef(None)), runtime::TableElement::ExternRef(Some(x)) => { Some(Val::ExternRef(Some(ExternRef { inner: x }))) diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index 942c6ed426..5615d438c2 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -1,7 +1,7 @@ use crate::store::{StoreData, StoreOpaque, Stored}; use crate::{ AsContext, AsContextMut, CallHook, Engine, Extern, FuncType, Instance, InterruptHandle, - StoreContext, StoreContextMut, Trap, Val, ValType, + StoreContext, StoreContextMut, Trap, Val, ValRaw, ValType, }; use anyhow::{bail, Context as _, Result}; use std::error::Error; @@ -306,20 +306,48 @@ impl Func { #[cfg(compiler)] #[cfg_attr(nightlydoc, doc(cfg(feature = "cranelift")))] // see build.rs pub fn new( - mut store: impl AsContextMut, + store: impl AsContextMut, ty: FuncType, func: impl Fn(Caller<'_, T>, &[Val], &mut [Val]) -> Result<(), Trap> + Send + Sync + 'static, ) -> Self { - let store = store.as_context_mut().0; - - // part of this unsafety is about matching the `T` to a `Store`, - // which is done through the `AsContextMut` bound above. + let ty_clone = ty.clone(); unsafe { - let host = HostFunc::new(store.engine(), ty, func); - host.into_func(store) + Func::new_unchecked(store, ty, move |caller, values| { + Func::invoke(caller, &ty_clone, values, &func) + }) } } + /// Creates a new [`Func`] with the given arguments, although has fewer + /// runtime checks than [`Func::new`]. + /// + /// This function takes a callback of a different signature than + /// [`Func::new`], instead receiving a raw pointer with a list of [`ValRaw`] + /// structures. These values have no type information associated with them + /// so it's up to the caller to provide a function that will correctly + /// interpret the list of values as those coming from the `ty` specified. + /// + /// If you're calling this from Rust it's recommended to either instead use + /// [`Func::new`] or [`Func::wrap`]. The [`Func::wrap`] API, in particular, + /// is both safer and faster than this API. + /// + /// # Unsafety + /// + /// This function is not safe because it's not known at compile time that + /// the `func` provided correctly interprets the argument types provided to + /// it, or that the results it produces will be of the correct type. + #[cfg(compiler)] + #[cfg_attr(nightlydoc, doc(cfg(feature = "cranelift")))] // see build.rs + pub unsafe fn new_unchecked( + mut store: impl AsContextMut, + ty: FuncType, + func: impl Fn(Caller<'_, T>, *mut ValRaw) -> Result<(), Trap> + Send + Sync + 'static, + ) -> Self { + let store = store.as_context_mut().0; + let host = HostFunc::new_unchecked(store.engine(), ty, func); + host.into_func(store) + } + /// Creates a new host-defined WebAssembly function which, when called, /// will run the asynchronous computation defined by `func` to completion /// and then return the result to WebAssembly. @@ -412,9 +440,9 @@ impl Func { pub(crate) unsafe fn from_caller_checked_anyfunc( store: &mut StoreOpaque, - anyfunc: *mut VMCallerCheckedAnyfunc, - ) -> Option { - let anyfunc = NonNull::new(anyfunc)?; + raw: *mut VMCallerCheckedAnyfunc, + ) -> Option { + let anyfunc = NonNull::new(raw)?; debug_assert!(anyfunc.as_ref().type_index != VMSharedSignatureIndex::default()); let export = ExportFunction { anyfunc }; Some(Func::from_wasmtime_function(export, store)) @@ -684,6 +712,84 @@ impl Func { self.call_impl(&mut store.as_context_mut(), params, results) } + /// Invokes this function in an "unchecked" fashion, reading parameters and + /// writing results to `params_and_returns`. + /// + /// This function is the same as [`Func::call`] except that the arguments + /// and results both use a different representation. If possible it's + /// recommended to use [`Func::call`] if safety isn't necessary or to use + /// [`Func::typed`] in conjunction with [`TypedFunc::call`] since that's + /// both safer and faster than this method of invoking a function. + /// + /// Note that if this function takes `externref` arguments then it will + /// **not** automatically GC unlike the [`Func::call`] and + /// [`TypedFunc::call`] functions. This means that if this function is + /// invoked many times with new `ExternRef` values and no other GC happens + /// via any other means then no values will get collected. + /// + /// # Unsafety + /// + /// This function is unsafe because the `params_and_returns` argument is not + /// validated at all. It must uphold invariants such as: + /// + /// * It's a valid pointer to an array + /// * It has enough space to store all parameters + /// * It has enough space to store all results (not at the same time as + /// parameters) + /// * Parameters are initially written to the array and have the correct + /// types and such. + /// * Reference types like `externref` and `funcref` are valid at the + /// time of this call and for the `store` specified. + /// + /// These invariants are all upheld for you with [`Func::call`] and + /// [`TypedFunc::call`]. + pub unsafe fn call_unchecked( + &self, + mut store: impl AsContextMut, + params_and_returns: *mut ValRaw, + ) -> Result<(), Trap> { + let mut store = store.as_context_mut(); + let data = &store.0.store_data()[self.0]; + let trampoline = data.trampoline(); + let anyfunc = data.export().anyfunc; + invoke_wasm_and_catch_traps(&mut store, |callee| { + trampoline( + (*anyfunc.as_ptr()).vmctx, + callee, + (*anyfunc.as_ptr()).func_ptr.as_ptr(), + params_and_returns, + ) + }) + } + + /// Converts the raw representation of a `funcref` into an `Option` + /// + /// This is intended to be used in conjunction with [`Func::new_unchecked`], + /// [`Func::call_unchecked`], and [`ValRaw`] with its `funcref` field. + /// + /// # Unsafety + /// + /// This function is not safe because `raw` is not validated at all. The + /// caller must guarantee that `raw` is owned by the `store` provided and is + /// valid within the `store`. + pub unsafe fn from_raw(mut store: impl AsContextMut, raw: usize) -> Option { + Func::from_caller_checked_anyfunc(store.as_context_mut().0, raw as *mut _) + } + + /// Extracts the raw value of this `Func`, which is owned by `store`. + /// + /// This function returns a value that's suitable for writing into the + /// `funcref` field of the [`ValRaw`] structure. + /// + /// # Unsafety + /// + /// The returned value is only valid for as long as the store is alive and + /// this function is properly rooted within it. Additionally this function + /// should not be liberally used since it's a very low-level knob. + pub unsafe fn to_raw(&self, store: impl AsContext) -> usize { + self.caller_checked_anyfunc(store.as_context().0).as_ptr() as usize + } + /// Invokes this function with the `params` given, returning the results /// asynchronously. /// @@ -766,80 +872,46 @@ impl Func { bail!("cross-`Store` values are not currently supported"); } } - let externref_params = ty.as_wasm_func_type().externref_params_count(); - let mut values_vec = write_params(store.0, externref_params, params, results)?; + let values_vec_size = params.len().max(ty.results().len()); + + // Whenever we pass `externref`s from host code to Wasm code, they + // go into the `VMExternRefActivationsTable`. But the table might be + // at capacity already, so check for that. If it is at capacity + // (unlikely) then do a GC to free up space. This is necessary + // because otherwise we would either keep filling up the bump chunk + // and making it larger and larger or we would always take the slow + // path when inserting references into the table. + if ty.as_wasm_func_type().externref_params_count() + > store + .0 + .externref_activations_table() + .bump_capacity_remaining() + { + store.gc(); + } + + // Store the argument values into `values_vec`. + let mut values_vec = store.0.take_wasm_val_raw_storage(); + debug_assert!(values_vec.is_empty()); + values_vec.resize_with(values_vec_size, || ValRaw { i32: 0 }); + for (arg, slot) in params.iter().cloned().zip(&mut values_vec) { + unsafe { + *slot = arg.to_raw(&mut *store); + } + } - // Call the trampoline. unsafe { - let data = &store.0.store_data()[self.0]; - let trampoline = data.trampoline(); - let anyfunc = data.export().anyfunc; - invoke_wasm_and_catch_traps(store, |callee| { - trampoline( - (*anyfunc.as_ptr()).vmctx, - callee, - (*anyfunc.as_ptr()).func_ptr.as_ptr(), - values_vec.as_mut_ptr(), - ) - })?; + self.call_unchecked(&mut *store, values_vec.as_mut_ptr())?; } - read_results(store.0, self, values_vec, results); - return Ok(()); - - fn write_params( - store: &mut StoreOpaque, - externref_params: usize, - params: &[Val], - results: &mut [Val], - ) -> Result> { - let values_vec_size = params.len().max(results.len()); - - let mut values_vec = store.take_wasm_u128_storage(); - debug_assert!(values_vec.is_empty()); - values_vec.extend((0..values_vec_size).map(|_| 0)); - - // Whenever we pass `externref`s from host code to Wasm code, they - // go into the `VMExternRefActivationsTable`. But the table might be - // at capacity already, so check for that. If it is at capacity - // (unlikely) then do a GC to free up space. This is necessary - // because otherwise we would either keep filling up the bump chunk - // and making it larger and larger or we would always take the slow - // path when inserting references into the table. - if externref_params - > store - .externref_activations_table() - .bump_capacity_remaining() - { - store.gc(); - } - - // Store the argument values into `values_vec`. - for (arg, slot) in params.iter().zip(&mut values_vec) { - unsafe { - arg.write_value_without_gc(store, slot); - } - } - - Ok(values_vec) - } - - fn read_results( - store: &mut StoreOpaque, - func: &Func, - mut values_vec: Vec, - results: &mut [Val], - ) { - for (i, (ptr, dst)) in values_vec.iter().zip(results).enumerate() { - let ty = store[func.0].ty.results().nth(i).unwrap(); - unsafe { - *dst = Val::read_value_from(store, ptr, ty); - } - } - values_vec.truncate(0); - store.save_wasm_u128_storage(values_vec); + for ((i, slot), val) in results.iter_mut().enumerate().zip(&values_vec) { + let ty = store[self.0].ty.results().nth(i).unwrap(); + *slot = unsafe { Val::from_raw(&mut *store, *val, ty) }; } + values_vec.truncate(0); + store.0.save_wasm_val_raw_storage(values_vec); + Ok(()) } #[inline] @@ -890,7 +962,7 @@ impl Func { fn invoke( mut caller: Caller<'_, T>, ty: &FuncType, - values_vec: *mut u128, + values_vec: *mut ValRaw, func: &dyn Fn(Caller<'_, T>, &[Val], &mut [Val]) -> Result<(), Trap>, ) -> Result<(), Trap> { caller.store.0.call_hook(CallHook::CallingHost)?; @@ -909,10 +981,7 @@ impl Func { let nparams = ty.params().len(); val_vec.reserve(nparams + ty.results().len()); for (i, ty) in ty.params().enumerate() { - unsafe { - let val = Val::read_value_from(caller.store.0, values_vec.add(i), ty); - val_vec.push(val); - } + val_vec.push(unsafe { Val::from_raw(&mut caller.store, *values_vec.add(i), ty) }) } val_vec.extend((0..ty.results().len()).map(|_| Val::null())); @@ -946,7 +1015,7 @@ impl Func { )); } unsafe { - ret.write_value_without_gc(caller.store.0, values_vec.add(i)); + *values_vec.add(i) = ret.to_raw(&mut caller.store); } } @@ -1276,7 +1345,7 @@ pub unsafe trait WasmRet { fn func_type(params: impl Iterator) -> FuncType; #[doc(hidden)] - unsafe fn wrap_trampoline(ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi); + unsafe fn wrap_trampoline(ptr: *mut ValRaw, f: impl FnOnce(Self::Retptr) -> Self::Abi); // Utilities used to convert an instance of this type to a `Result` // explicitly, used when wrapping async functions which always bottom-out @@ -1313,7 +1382,7 @@ where FuncType::new(params, Some(::valtype())) } - unsafe fn wrap_trampoline(ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi) { + unsafe fn wrap_trampoline(ptr: *mut ValRaw, f: impl FnOnce(Self::Retptr) -> Self::Abi) { *ptr.cast::() = f(()); } @@ -1353,7 +1422,7 @@ where T::func_type(params) } - unsafe fn wrap_trampoline(ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi) { + unsafe fn wrap_trampoline(ptr: *mut ValRaw, f: impl FnOnce(Self::Retptr) -> Self::Abi) { T::wrap_trampoline(ptr, f) } @@ -1399,7 +1468,7 @@ macro_rules! impl_wasm_host_results { } #[allow(unused_assignments)] - unsafe fn wrap_trampoline(mut _ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi) { + unsafe fn wrap_trampoline(mut _ptr: *mut ValRaw, f: impl FnOnce(Self::Retptr) -> Self::Abi) { let ($($t,)*) = <($($t::Abi,)*) as HostAbi>::call(f); $( *_ptr.cast() = $t; @@ -1866,7 +1935,7 @@ macro_rules! impl_into_func { callee_vmctx: *mut VMContext, caller_vmctx: *mut VMContext, ptr: *const VMFunctionBody, - args: *mut u128, + args: *mut ValRaw, ) where $($args: WasmTy,)* @@ -1956,14 +2025,23 @@ impl HostFunc { func: impl Fn(Caller<'_, T>, &[Val], &mut [Val]) -> Result<(), Trap> + Send + Sync + 'static, ) -> Self { let ty_clone = ty.clone(); - - // Create a trampoline that converts raw u128 values to `Val` - let func = move |caller_vmctx, values_vec: *mut u128| unsafe { - Caller::with(caller_vmctx, |caller| { - Func::invoke(caller, &ty_clone, values_vec, &func) + unsafe { + HostFunc::new_unchecked(engine, ty, move |caller, values| { + Func::invoke(caller, &ty_clone, values, &func) }) - }; + } + } + /// Analog of [`Func::new_unchecked`] + #[cfg(compiler)] + pub unsafe fn new_unchecked( + engine: &Engine, + ty: FuncType, + func: impl Fn(Caller<'_, T>, *mut ValRaw) -> Result<(), Trap> + Send + Sync + 'static, + ) -> Self { + let func = move |caller_vmctx, values: *mut ValRaw| unsafe { + Caller::::with(caller_vmctx, |caller| func(caller, values)) + }; let (instance, trampoline) = crate::trampoline::create_function(&ty, func, engine) .expect("failed to create function"); HostFunc::_new(engine, instance, trampoline) diff --git a/crates/wasmtime/src/linker.rs b/crates/wasmtime/src/linker.rs index a84b497959..9eb91506eb 100644 --- a/crates/wasmtime/src/linker.rs +++ b/crates/wasmtime/src/linker.rs @@ -3,7 +3,7 @@ use crate::instance::{InstanceData, InstancePre}; use crate::store::StoreOpaque; use crate::{ AsContextMut, Caller, Engine, Extern, ExternType, Func, FuncType, ImportType, Instance, - IntoFunc, Module, StoreContextMut, Trap, Val, + IntoFunc, Module, StoreContextMut, Trap, Val, ValRaw, }; use anyhow::{anyhow, bail, Context, Error, Result}; use log::warn; @@ -315,6 +315,24 @@ impl Linker { Ok(self) } + /// Creates a [`Func::new_unchecked`]-style function named in this linker. + /// + /// For more information see [`Linker::func_wrap`]. + #[cfg(compiler)] + #[cfg_attr(nightlydoc, doc(cfg(feature = "cranelift")))] // see build.rs + pub unsafe fn func_new_unchecked( + &mut self, + module: &str, + name: &str, + ty: FuncType, + func: impl Fn(Caller<'_, T>, *mut ValRaw) -> Result<(), Trap> + Send + Sync + 'static, + ) -> Result<&mut Self> { + let func = HostFunc::new_unchecked(&self.engine, ty, func); + let key = self.import_key(module, Some(name)); + self.insert(key, Definition::HostFunc(Arc::new(func)))?; + Ok(self) + } + /// Creates a [`Func::new_async`]-style function named in this linker. /// /// For more information see [`Linker::func_wrap`]. diff --git a/crates/wasmtime/src/ref.rs b/crates/wasmtime/src/ref.rs index 9af6057a43..f999b96d80 100644 --- a/crates/wasmtime/src/ref.rs +++ b/crates/wasmtime/src/ref.rs @@ -1,5 +1,6 @@ #![allow(missing_docs)] +use crate::AsContextMut; use std::any::Any; use wasmtime_runtime::VMExternRef; @@ -40,4 +41,62 @@ impl ExternRef { pub fn ptr_eq(&self, other: &ExternRef) -> bool { VMExternRef::eq(&self.inner, &other.inner) } + + /// Creates a new strongly-owned [`ExternRef`] from the raw value provided. + /// + /// This is intended to be used in conjunction with [`Func::new_unchecked`], + /// [`Func::call_unchecked`], and [`ValRaw`] with its `externref` field. + /// + /// This function assumes that `raw` is an externref value which is + /// currently rooted within the [`Store`]. + /// + /// # Unsafety + /// + /// This function is particularly `unsafe` because `raw` not only must be a + /// valid externref value produced prior by `to_raw` but it must also be + /// correctly rooted within the store. When arguments are provided to a + /// callback with [`Func::new_unchecked`], for example, or returned via + /// [`Func::call_unchecked`], if a GC is performed within the store then + /// floating externref values are not rooted and will be GC'd, meaning that + /// this function will no longer be safe to call with the values cleaned up. + /// This function must be invoked *before* possible GC operations can happen + /// (such as calling wasm). + /// + /// When in doubt try to not use this. Instead use the safe Rust APIs of + /// [`TypedFunc`] and friends. + /// + /// [`Func::call_unchecked`]: crate::Func::call_unchecked + /// [`Func::new_unchecked`]: crate::Func::new_unchecked + /// [`Store`]: crate::Store + /// [`TypedFunc`]: crate::TypedFunc + /// [`ValRaw`]: crate::ValRaw + pub unsafe fn from_raw(raw: usize) -> Option { + let raw = raw as *mut u8; + if raw.is_null() { + None + } else { + Some(ExternRef { + inner: VMExternRef::clone_from_raw(raw), + }) + } + } + + /// Converts this [`ExternRef`] to a raw value suitable to store within a + /// [`ValRaw`]. + /// + /// # Unsafety + /// + /// Produces a raw value which is only safe to pass into a store if a GC + /// doesn't happen between when the value is produce and when it's passed + /// into the store. + /// + /// [`ValRaw`]: crate::ValRaw + pub unsafe fn to_raw(&self, mut store: impl AsContextMut) -> usize { + let externref_ptr = self.inner.as_raw(); + store + .as_context_mut() + .0 + .insert_vmexternref_without_gc(self.inner.clone()); + externref_ptr as usize + } } diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index e797984bc7..365574bd11 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -76,7 +76,7 @@ //! contents of `StoreOpaque`. This is an invariant that we, as the authors of //! `wasmtime`, must uphold for the public interface to be safe. -use crate::{module::ModuleRegistry, Engine, Module, Trap, Val}; +use crate::{module::ModuleRegistry, Engine, Module, Trap, Val, ValRaw}; use anyhow::{bail, Result}; use std::cell::UnsafeCell; use std::collections::HashMap; @@ -276,7 +276,7 @@ pub struct StoreOpaque { hostcall_val_storage: Vec, /// Same as `hostcall_val_storage`, but for the direction of the host /// calling wasm. - wasm_u128_storage: Vec, + wasm_val_raw_storage: Vec, } #[cfg(feature = "async")] @@ -433,7 +433,7 @@ impl Store { store_data: StoreData::new(), default_callee, hostcall_val_storage: Vec::new(), - wasm_u128_storage: Vec::new(), + wasm_val_raw_storage: Vec::new(), }, limiter: None, call_hook: None, @@ -1182,16 +1182,16 @@ impl StoreOpaque { /// Same as `take_hostcall_val_storage`, but for the direction of the host /// calling wasm. #[inline] - pub fn take_wasm_u128_storage(&mut self) -> Vec { - mem::take(&mut self.wasm_u128_storage) + pub fn take_wasm_val_raw_storage(&mut self) -> Vec { + mem::take(&mut self.wasm_val_raw_storage) } /// Same as `save_hostcall_val_storage`, but for the direction of the host /// calling wasm. #[inline] - pub fn save_wasm_u128_storage(&mut self, storage: Vec) { - if storage.capacity() > self.wasm_u128_storage.capacity() { - self.wasm_u128_storage = storage; + pub fn save_wasm_val_raw_storage(&mut self, storage: Vec) { + if storage.capacity() > self.wasm_val_raw_storage.capacity() { + self.wasm_val_raw_storage = storage; } } } diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index 3c7e8c84c6..5c2e2a1b84 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -1,6 +1,6 @@ //! Support for a calling of an imported function. -use crate::{Engine, FuncType, Trap}; +use crate::{Engine, FuncType, Trap, ValRaw}; use anyhow::Result; use std::any::Any; use std::panic::{self, AssertUnwindSafe}; @@ -21,9 +21,9 @@ struct TrampolineState { unsafe extern "C" fn stub_fn( vmctx: *mut VMContext, caller_vmctx: *mut VMContext, - values_vec: *mut u128, + values_vec: *mut ValRaw, ) where - F: Fn(*mut VMContext, *mut u128) -> Result<(), Trap> + 'static, + F: Fn(*mut VMContext, *mut ValRaw) -> Result<(), Trap> + 'static, { // Here we are careful to use `catch_unwind` to ensure Rust panics don't // unwind past us. The primary reason for this is that Rust considers it UB @@ -72,7 +72,7 @@ pub fn create_function( engine: &Engine, ) -> Result<(InstanceHandle, VMTrampoline)> where - F: Fn(*mut VMContext, *mut u128) -> Result<(), Trap> + Send + Sync + 'static, + F: Fn(*mut VMContext, *mut ValRaw) -> Result<(), Trap> + Send + Sync + 'static, { let mut obj = engine.compiler().object()?; let (t1, t2) = engine.compiler().emit_trampoline_obj( diff --git a/crates/wasmtime/src/values.rs b/crates/wasmtime/src/values.rs index c92e511e20..c9b10a8190 100644 --- a/crates/wasmtime/src/values.rs +++ b/crates/wasmtime/src/values.rs @@ -1,9 +1,11 @@ use crate::r#ref::ExternRef; use crate::store::StoreOpaque; -use crate::{Func, ValType}; +use crate::{AsContextMut, Func, ValType}; use anyhow::{bail, Result}; use std::ptr; -use wasmtime_runtime::{self as runtime, VMExternRef}; +use wasmtime_runtime::TableElement; + +pub use wasmtime_runtime::ValRaw; /// Possible runtime values that a WebAssembly module can either consume or /// produce. @@ -93,55 +95,52 @@ impl Val { } } - pub(crate) unsafe fn write_value_without_gc(&self, store: &mut StoreOpaque, p: *mut u128) { - match *self { - Val::I32(i) => ptr::write(p as *mut i32, i), - Val::I64(i) => ptr::write(p as *mut i64, i), - Val::F32(u) => ptr::write(p as *mut u32, u), - Val::F64(u) => ptr::write(p as *mut u64, u), - Val::V128(b) => ptr::write(p as *mut u128, b), - Val::ExternRef(None) => ptr::write(p, 0), - Val::ExternRef(Some(ref x)) => { - let externref_ptr = x.inner.as_raw(); - store.insert_vmexternref_without_gc(x.clone().inner); - ptr::write(p as *mut *mut u8, externref_ptr) + /// Convenience method to convert this [`Val`] into a [`ValRaw`]. + /// + /// # Unsafety + /// + /// This method is unsafe for the reasons that [`ExternRef::to_raw`] and + /// [`Func::to_raw`] are unsafe. + pub unsafe fn to_raw(&self, store: impl AsContextMut) -> ValRaw { + match self { + Val::I32(i) => ValRaw { i32: *i }, + Val::I64(i) => ValRaw { i64: *i }, + Val::F32(u) => ValRaw { f32: *u }, + Val::F64(u) => ValRaw { f64: *u }, + Val::V128(b) => ValRaw { v128: *b }, + Val::ExternRef(e) => { + let externref = match e { + Some(e) => e.to_raw(store), + None => 0, + }; + ValRaw { externref } + } + Val::FuncRef(f) => { + let funcref = match f { + Some(f) => f.to_raw(store), + None => 0, + }; + ValRaw { funcref } } - Val::FuncRef(f) => ptr::write( - p as *mut *mut runtime::VMCallerCheckedAnyfunc, - if let Some(f) = f { - f.caller_checked_anyfunc(store).as_ptr() - } else { - ptr::null_mut() - }, - ), } } - pub(crate) unsafe fn read_value_from( - store: &mut StoreOpaque, - p: *const u128, - ty: ValType, - ) -> Val { + /// Convenience method to convert a [`ValRaw`] into a [`Val`]. + /// + /// # Unsafety + /// + /// This method is unsafe for the reasons that [`ExternRef::from_raw`] and + /// [`Func::from_raw`] are unsafe. Additionaly there's no guarantee + /// otherwise that `raw` should have the type `ty` specified. + pub unsafe fn from_raw(store: impl AsContextMut, raw: ValRaw, ty: ValType) -> Val { match ty { - ValType::I32 => Val::I32(ptr::read(p as *const i32)), - ValType::I64 => Val::I64(ptr::read(p as *const i64)), - ValType::F32 => Val::F32(ptr::read(p as *const u32)), - ValType::F64 => Val::F64(ptr::read(p as *const u64)), - ValType::V128 => Val::V128(ptr::read(p as *const u128)), - ValType::ExternRef => { - let raw = ptr::read(p as *const *mut u8); - if raw.is_null() { - Val::ExternRef(None) - } else { - Val::ExternRef(Some(ExternRef { - inner: VMExternRef::clone_from_raw(raw), - })) - } - } - ValType::FuncRef => { - let func = ptr::read(p as *const *mut runtime::VMCallerCheckedAnyfunc); - from_checked_anyfunc(func, store) - } + ValType::I32 => Val::I32(raw.i32), + ValType::I64 => Val::I64(raw.i64), + ValType::F32 => Val::F32(raw.f32), + ValType::F64 => Val::F64(raw.f64), + ValType::V128 => Val::V128(raw.v128), + ValType::ExternRef => Val::ExternRef(ExternRef::from_raw(raw.externref)), + ValType::FuncRef => Val::FuncRef(Func::from_raw(store, raw.funcref)), } } @@ -189,25 +188,21 @@ impl Val { self, store: &mut StoreOpaque, ty: ValType, - ) -> Result { + ) -> Result { match (self, ty) { (Val::FuncRef(Some(f)), ValType::FuncRef) => { if !f.comes_from_same_store(store) { bail!("cross-`Store` values are not supported in tables"); } - Ok(runtime::TableElement::FuncRef( + Ok(TableElement::FuncRef( f.caller_checked_anyfunc(store).as_ptr(), )) } - (Val::FuncRef(None), ValType::FuncRef) => { - Ok(runtime::TableElement::FuncRef(ptr::null_mut())) - } + (Val::FuncRef(None), ValType::FuncRef) => Ok(TableElement::FuncRef(ptr::null_mut())), (Val::ExternRef(Some(x)), ValType::ExternRef) => { - Ok(runtime::TableElement::ExternRef(Some(x.inner))) - } - (Val::ExternRef(None), ValType::ExternRef) => { - Ok(runtime::TableElement::ExternRef(None)) + Ok(TableElement::ExternRef(Some(x.inner))) } + (Val::ExternRef(None), ValType::ExternRef) => Ok(TableElement::ExternRef(None)), _ => bail!("value does not match table element type"), } } @@ -293,24 +288,3 @@ impl From for Val { Val::V128(val) } } - -pub(crate) fn into_checked_anyfunc( - val: Val, - store: &mut StoreOpaque, -) -> Result<*mut wasmtime_runtime::VMCallerCheckedAnyfunc> { - if !val.comes_from_same_store(store) { - bail!("cross-`Store` values are not supported"); - } - Ok(match val { - Val::FuncRef(None) => ptr::null_mut(), - Val::FuncRef(Some(f)) => f.caller_checked_anyfunc(store).as_ptr(), - _ => bail!("val is not funcref"), - }) -} - -pub(crate) unsafe fn from_checked_anyfunc( - anyfunc: *mut wasmtime_runtime::VMCallerCheckedAnyfunc, - store: &mut StoreOpaque, -) -> Val { - Val::FuncRef(Func::from_caller_checked_anyfunc(store, anyfunc)) -} From 98831fe4e246f7ea00ce10d337cef285841c5e17 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 24 Sep 2021 15:07:16 -0500 Subject: [PATCH 40/93] Update zeroize_derive to fix a rustsec warning (#3389) Should hopefully appease CI --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e791a4ced..6d7b99926c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4094,9 +4094,9 @@ dependencies = [ [[package]] name = "zeroize_derive" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2c1e130bebaeab2f23886bf9acbaca14b092408c452543c857f66399cd6dab1" +checksum = "bdff2024a851a322b08f179173ae2ba620445aef1e838f0c196820eade4ae0c7" dependencies = [ "proc-macro2", "quote", From 1ee2af00982f2d7d3dc815434e1641446a9599c6 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 27 Sep 2021 12:27:19 -0500 Subject: [PATCH 41/93] Remove the lightbeam backend (#3390) This commit removes the Lightbeam backend from Wasmtime as per [RFC 14]. This backend hasn't received maintenance in quite some time, and as [RFC 14] indicates this doesn't meet the threshold for keeping the code in-tree, so this commit removes it. A fast "baseline" compiler may still be added in the future. The addition of such a backend should be in line with [RFC 14], though, with the principles we now have for stable releases of Wasmtime. I'll close out Lightbeam-related issues once this is merged. [RFC 14]: https://github.com/bytecodealliance/rfcs/pull/14 --- .github/workflows/main.yml | 13 +- Cargo.lock | 145 - Cargo.toml | 4 +- README.md | 4 +- build.rs | 14 +- ci/run-tests.sh | 1 - .../codegen/src/isa/x64/inst/emit_tests.rs | 4 +- crates/c-api/Cargo.toml | 1 - crates/c-api/include/wasmtime/config.h | 12 +- crates/c-api/src/config.rs | 2 - crates/fuzzing/src/generators.rs | 12 +- crates/fuzzing/src/oracles.rs | 2 +- crates/lightbeam/Cargo.toml | 36 - crates/lightbeam/LICENSE | 220 - crates/lightbeam/README.md | 168 - crates/lightbeam/examples/test.rs | 16 - crates/lightbeam/src/backend.rs | 6134 ----------------- crates/lightbeam/src/benches.rs | 112 - crates/lightbeam/src/disassemble.rs | 57 - crates/lightbeam/src/error.rs | 29 - crates/lightbeam/src/function_body.rs | 902 --- crates/lightbeam/src/lib.rs | 24 - crates/lightbeam/src/microwasm.rs | 2386 ------- crates/lightbeam/src/module.rs | 570 -- crates/lightbeam/src/translate_sections.rs | 124 - crates/lightbeam/tests/quickchecks.rs | 654 -- crates/lightbeam/tests/wrongs.rs | 45 - crates/lightbeam/wasmtime/Cargo.toml | 22 - crates/lightbeam/wasmtime/LICENSE | 220 - crates/lightbeam/wasmtime/README.md | 4 - crates/lightbeam/wasmtime/src/lib.rs | 370 - crates/wasmtime/Cargo.toml | 3 - crates/wasmtime/src/config.rs | 15 +- crates/wasmtime/src/module/serialization.rs | 20 - crates/wast/src/wast.rs | 4 - docs/contributing-architecture.md | 8 +- fuzz/fuzz_targets/compile.rs | 5 - scripts/publish.rs | 5 +- src/lib.rs | 25 +- 39 files changed, 20 insertions(+), 12372 deletions(-) delete mode 100644 crates/lightbeam/Cargo.toml delete mode 100644 crates/lightbeam/LICENSE delete mode 100644 crates/lightbeam/README.md delete mode 100644 crates/lightbeam/examples/test.rs delete mode 100644 crates/lightbeam/src/backend.rs delete mode 100644 crates/lightbeam/src/benches.rs delete mode 100644 crates/lightbeam/src/disassemble.rs delete mode 100644 crates/lightbeam/src/error.rs delete mode 100644 crates/lightbeam/src/function_body.rs delete mode 100644 crates/lightbeam/src/lib.rs delete mode 100644 crates/lightbeam/src/microwasm.rs delete mode 100644 crates/lightbeam/src/module.rs delete mode 100644 crates/lightbeam/src/translate_sections.rs delete mode 100644 crates/lightbeam/tests/quickchecks.rs delete mode 100644 crates/lightbeam/tests/wrongs.rs delete mode 100644 crates/lightbeam/wasmtime/Cargo.toml delete mode 100644 crates/lightbeam/wasmtime/LICENSE delete mode 100644 crates/lightbeam/wasmtime/README.md delete mode 100644 crates/lightbeam/wasmtime/src/lib.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e5a0ea5ca3..500c743f97 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -131,7 +131,6 @@ jobs: # Check some feature combinations of the `wasmtime` crate - run: cargo check -p wasmtime --no-default-features - run: cargo check -p wasmtime --no-default-features --features wat - - run: cargo check -p wasmtime --no-default-features --features lightbeam - run: cargo check -p wasmtime --no-default-features --features jitdump - run: cargo check -p wasmtime --no-default-features --features vtune - run: cargo check -p wasmtime --no-default-features --features cache @@ -307,7 +306,7 @@ jobs: RUST_BACKTRACE: 1 if: matrix.target == '' - # Build and test all features except for lightbeam + # Build and test all features - run: ./ci/run-tests.sh --locked env: RUST_BACKTRACE: 1 @@ -329,16 +328,6 @@ jobs: env: RUST_BACKTRACE: 1 - # Build and test lightbeam. Note that - # Lightbeam tests fail right now, but we don't want to block on that. - - run: cargo build --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' && matrix.target != 's390x-unknown-linux-gnu' - - run: cargo test --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' && matrix.target != 's390x-unknown-linux-gnu' - continue-on-error: true - env: - RUST_BACKTRACE: 1 - # Perform all tests (debug mode) for `wasmtime` with the old x86 backend. test_x86: name: Test old x86 backend diff --git a/Cargo.lock b/Cargo.lock index 6d7b99926c..a2e80da3ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,12 +125,6 @@ dependencies = [ "derive_arbitrary", ] -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "async-trait" version = "0.1.50" @@ -526,12 +520,6 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f6b64db6932c7e49332728e3a6bd82c6b7e16016607d20923b537c3bc4c0d5f" -[[package]] -name = "convert_case" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" - [[package]] name = "cpp_demangle" version = "0.3.2" @@ -990,29 +978,6 @@ dependencies = [ "syn", ] -[[package]] -name = "derive_more" -version = "0.99.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc7b9cef1e351660e5443924e4f43ab25fbbed3e9a5f052df3677deb4d6b320" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "derive_utils" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532b4c15dccee12c7044f1fcad956e98410860b22231e44a3b827464797ca7bf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "digest" version = "0.9.0" @@ -1059,32 +1024,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" -[[package]] -name = "dynasm" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdc2d9a5e44da60059bd38db2d05cbb478619541b8c79890547861ec1e3194f0" -dependencies = [ - "bitflags", - "byteorder", - "lazy_static", - "proc-macro-error", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "dynasmrt" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42276e3f205fe63887cca255aa9a65a63fb72764c30b9a6252a7c7e46994f689" -dependencies = [ - "byteorder", - "dynasm", - "memmap2", -] - [[package]] name = "ecdsa" version = "0.10.2" @@ -1494,17 +1433,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135" -[[package]] -name = "iter-enum" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f947f0d9df7e69c4df60a950c0a83741455bb9ebd8fd9b5a87994dda4dbb005" -dependencies = [ - "derive_utils", - "quote", - "syn", -] - [[package]] name = "itertools" version = "0.9.0" @@ -1622,30 +1550,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a" -[[package]] -name = "lightbeam" -version = "0.30.0" -dependencies = [ - "anyhow", - "arrayvec", - "capstone", - "cranelift-codegen", - "derive_more", - "dynasm", - "dynasmrt", - "iter-enum", - "itertools 0.10.0", - "lazy_static", - "memoffset", - "more-asserts", - "quickcheck", - "smallvec", - "thiserror", - "typemap", - "wasmparser", - "wat", -] - [[package]] name = "linux-raw-sys" version = "0.0.24" @@ -2342,17 +2246,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" -[[package]] -name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "env_logger 0.8.3", - "log", - "rand 0.8.3", -] - [[package]] name = "quote" version = "1.0.9" @@ -3178,21 +3071,6 @@ dependencies = [ "tracing-serde", ] -[[package]] -name = "traitobject" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd1f82c56340fdf16f2a953d7bda4f8fdffba13d93b00844c25572110b26079" - -[[package]] -name = "typemap" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "653be63c80a3296da5551e1bfd2cca35227e13cdd08c6668903ae2f4f77aa1f6" -dependencies = [ - "unsafe-any", -] - [[package]] name = "typenum" version = "1.13.0" @@ -3227,15 +3105,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "unsafe-any" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30360d7979f5e9c6e6cea48af192ea8fab4afb3cf72597154b8f08935bc9c7f" -dependencies = [ - "traitobject", -] - [[package]] name = "unsafe-io" version = "0.9.1" @@ -3797,20 +3666,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "wasmtime-lightbeam" -version = "0.30.0" -dependencies = [ - "anyhow", - "cranelift-codegen", - "gimli", - "lightbeam", - "object", - "target-lexicon", - "wasmparser", - "wasmtime-environ", -] - [[package]] name = "wasmtime-runtime" version = "0.30.0" diff --git a/Cargo.toml b/Cargo.toml index bb7806a973..08e0c0eb72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,7 +75,6 @@ members = [ "cranelift", "crates/bench-api", "crates/c-api", - "crates/lightbeam/wasmtime", "crates/misc/run-examples", "examples/fib-debug/wasm", "examples/wasi/wasm", @@ -83,13 +82,12 @@ members = [ "fuzz", ] exclude = [ - 'crates/wasi-common/WASI/tools/witx-cli', + 'crates/wasi-common/WASI/tools/witx-cli', 'docs/rust_wasi_markdown_parser' ] [features] default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation", "wasi-nn"] -lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] vtune = ["wasmtime/vtune"] wasi-crypto = ["wasmtime-wasi-crypto"] diff --git a/README.md b/README.md index 302883cd26..f5151c2acd 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,8 @@ Hello, world! quickly generate high-quality machine code at runtime. * **Configurable**. Whether you need to precompile your wasm ahead of time, - generate code blazingly fast with Lightbeam, or interpret it at runtime, - Wasmtime has you covered for all your wasm-executing needs. + or interpret it at runtime, Wasmtime has you covered for all your + wasm-executing needs. * **WASI**. Wasmtime supports a rich set of APIs for interacting with the host environment through the [WASI standard](https://wasi.dev). diff --git a/build.rs b/build.rs index b3dc1fd8ea..06f0669cdf 100644 --- a/build.rs +++ b/build.rs @@ -17,11 +17,7 @@ fn main() -> anyhow::Result<()> { ); let mut out = String::new(); - for strategy in &[ - "Cranelift", - #[cfg(feature = "lightbeam")] - "Lightbeam", - ] { + for strategy in &["Cranelift"] { writeln!(out, "#[cfg(test)]")?; writeln!(out, "#[allow(non_snake_case)]")?; writeln!(out, "mod {} {{", strategy)?; @@ -185,14 +181,6 @@ fn write_testsuite_tests( /// Ignore tests that aren't supported yet. fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { match strategy { - #[cfg(feature = "lightbeam")] - "Lightbeam" => match (testsuite, testname) { - ("simd", _) => return true, - ("multi_value", _) => return true, - ("reference_types", _) => return true, - ("bulk_memory_operations", _) => return true, - _ => (), - }, "Cranelift" => match (testsuite, testname) { // Skip all reference types tests on the old backend. The modern // implementation of reference types uses atomic instructions diff --git a/ci/run-tests.sh b/ci/run-tests.sh index 15b2cabc03..7bd661ffe5 100755 --- a/ci/run-tests.sh +++ b/ci/run-tests.sh @@ -3,7 +3,6 @@ cargo test \ --features "test-programs/test_programs" \ --workspace \ - --exclude '*lightbeam*' \ --exclude 'wasmtime-wasi-*' \ --exclude 'peepmatic*' \ --exclude wasi-crypto \ diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index c02593eab2..1a81191141 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -8,8 +8,8 @@ //! //! cd to the top of your wasmtime tree, then: //! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \ -//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \ -//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \ +//! --features experimental_x64 --all --exclude peepmatic \ +//! --exclude peepmatic-automata --exclude peepmatic-fuzzing \ //! --exclude peepmatic-macro --exclude wasmtime-wasi-nn -- isa::x64::inst::emit_tests::test_x64_emit use super::*; diff --git a/crates/c-api/Cargo.toml b/crates/c-api/Cargo.toml index 39e99eb183..97a0d759ae 100644 --- a/crates/c-api/Cargo.toml +++ b/crates/c-api/Cargo.toml @@ -34,7 +34,6 @@ cap-std = { version = "0.19.1", optional = true } [features] default = ['jitdump', 'wat', 'wasi', 'cache'] -lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] cache = ["wasmtime/cache"] wasi = ['wasi-common', 'wasi-cap-std-sync', 'wasmtime-wasi', 'cap-std'] diff --git a/crates/c-api/include/wasmtime/config.h b/crates/c-api/include/wasmtime/config.h index 0150ee01a5..9f90059b09 100644 --- a/crates/c-api/include/wasmtime/config.h +++ b/crates/c-api/include/wasmtime/config.h @@ -26,19 +26,13 @@ typedef uint8_t wasmtime_strategy_t; * The default value is #WASMTIME_STRATEGY_AUTO. */ enum wasmtime_strategy_enum { // Strategy - /// Wasmtime will automatically determine whether to use Cranelift or - /// Lightbeam, and currently it will always pick Cranelift. This default may - /// change over time though. + /// Automatically picks the compilation backend, currently always defaulting + /// to Cranelift. WASMTIME_STRATEGY_AUTO, - /// Indicates that Cranelift will unconditionally use Cranelift to compile + /// Indicates that Wasmtime will unconditionally use Cranelift to compile /// WebAssembly code. WASMTIME_STRATEGY_CRANELIFT, - - /// Indicates that Cranelift will unconditionally use Lightbeam to compile - /// WebAssembly code. Note that Lightbeam isn't always enabled at compile - /// time, and if that's the case an error will be returned. - WASMTIME_STRATEGY_LIGHTBEAM, }; /** diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index 6386ca6c91..64c3a88131 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -20,7 +20,6 @@ wasmtime_c_api_macros::declare_own!(wasm_config_t); pub enum wasmtime_strategy_t { WASMTIME_STRATEGY_AUTO, WASMTIME_STRATEGY_CRANELIFT, - WASMTIME_STRATEGY_LIGHTBEAM, } #[repr(u8)] @@ -114,7 +113,6 @@ pub extern "C" fn wasmtime_config_strategy_set( let result = c.config.strategy(match strategy { WASMTIME_STRATEGY_AUTO => Strategy::Auto, WASMTIME_STRATEGY_CRANELIFT => Strategy::Cranelift, - WASMTIME_STRATEGY_LIGHTBEAM => Strategy::Lightbeam, }); handle_result(result, |_cfg| {}) } diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 242a5bc3bb..10eaa86844 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -18,7 +18,6 @@ use arbitrary::{Arbitrary, Unstructured}; /// testing between. #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] pub struct DifferentialConfig { - strategy: DifferentialStrategy, opt_level: OptLevel, force_jump_veneers: bool, } @@ -26,10 +25,7 @@ pub struct DifferentialConfig { impl DifferentialConfig { /// Convert this differential fuzzing config into a `wasmtime::Config`. pub fn to_wasmtime_config(&self) -> anyhow::Result { - let mut config = crate::fuzz_default_config(match self.strategy { - DifferentialStrategy::Cranelift => wasmtime::Strategy::Cranelift, - DifferentialStrategy::Lightbeam => wasmtime::Strategy::Lightbeam, - })?; + let mut config = crate::fuzz_default_config(wasmtime::Strategy::Cranelift)?; config.cranelift_opt_level(self.opt_level.to_wasmtime()); if self.force_jump_veneers { unsafe { @@ -40,12 +36,6 @@ impl DifferentialConfig { } } -#[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] -enum DifferentialStrategy { - Cranelift, - Lightbeam, -} - #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] enum OptLevel { None, diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index 467aa6955e..ce41fd1410 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -253,7 +253,7 @@ pub fn differential_execution( let configs: Vec<_> = match configs.iter().map(|c| c.to_wasmtime_config()).collect() { Ok(cs) => cs, // If the config is trying to use something that was turned off at - // compile time, eg lightbeam, just continue to the next fuzz input. + // compile time just continue to the next fuzz input. Err(_) => return, }; diff --git a/crates/lightbeam/Cargo.toml b/crates/lightbeam/Cargo.toml deleted file mode 100644 index a974df5565..0000000000 --- a/crates/lightbeam/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -[package] -name = "lightbeam" -version = "0.30.0" -authors = ["The Lightbeam Project Developers"] -description = "An optimising one-pass streaming compiler for WebAssembly" -license = "Apache-2.0 WITH LLVM-exception" -repository = "https://github.com/bytecodealliance/wasmtime" -readme = "README.md" -categories = ["wasm"] -keywords = ["webassembly", "wasm", "compile", "compiler", "jit"] -edition = "2018" - -[dependencies] -arrayvec = "0.5" -capstone = "0.9.0" -cranelift-codegen = { path = "../../cranelift/codegen", version = "0.77.0" } -derive_more = "0.99" -dynasm = "1.0.0" -dynasmrt = "1.0.0" -iter-enum = "1" -itertools = "0.10.0" -memoffset = "0.6.0" -more-asserts = "0.2.1" -smallvec = "1.6.1" -thiserror = "1.0.9" -typemap = "0.3" -wasmparser = "0.80" - -[dev-dependencies] -lazy_static = "1.2" -wat = "1.0.37" -quickcheck = "1.0.0" -anyhow = "1.0" - -[badges] -maintenance = { status = "experimental" } diff --git a/crates/lightbeam/LICENSE b/crates/lightbeam/LICENSE deleted file mode 100644 index f9d81955f4..0000000000 --- a/crates/lightbeam/LICENSE +++ /dev/null @@ -1,220 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ---- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - diff --git a/crates/lightbeam/README.md b/crates/lightbeam/README.md deleted file mode 100644 index 85d787481a..0000000000 --- a/crates/lightbeam/README.md +++ /dev/null @@ -1,168 +0,0 @@ -# Lightbeam - -Lightbeam is an optimising one-pass streaming compiler for WebAssembly, intended for use in [Wasmtime][wasmtime]. - -[wasmtime]: https://github.com/bytecodealliance/wasmtime - -## Quality of output - -Already - with a very small number of relatively simple optimisation rules - Lightbeam produces surprisingly high-quality output considering how restricted it is. It even produces better code than Cranelift, Firefox or both for some workloads. Here's a very simple example, this recursive fibonacci function in Rust: - -```rust -fn fib(n: i32) -> i32 { - if n == 0 || n == 1 { - 1 - } else { - fib(n - 1) + fib(n - 2) - } -} -``` - -When compiled with optimisations enabled, rustc will produce the following WebAssembly: - -```rust -(module - (func $fib (param $p0 i32) (result i32) - (local $l1 i32) - (set_local $l1 - (i32.const 1)) - (block $B0 - (br_if $B0 - (i32.lt_u - (get_local $p0) - (i32.const 2))) - (set_local $l1 - (i32.const 1)) - (loop $L1 - (set_local $l1 - (i32.add - (call $fib - (i32.add - (get_local $p0) - (i32.const -1))) - (get_local $l1))) - (br_if $L1 - (i32.gt_u - (tee_local $p0 - (i32.add - (get_local $p0) - (i32.const -2))) - (i32.const 1))))) - (get_local $l1))) -``` - -Firefox's optimising compiler produces the following assembly (labels cleaned up somewhat): - -```asm -fib: - sub rsp, 0x18 - cmp qword ptr [r14 + 0x28], rsp - jae stack_overflow - mov dword ptr [rsp + 0xc], edi - cmp edi, 2 - jae .Lelse - mov eax, 1 - mov dword ptr [rsp + 8], eax - jmp .Lreturn -.Lelse: - mov dword ptr [rsp + 0xc], edi - mov eax, 1 - mov dword ptr [rsp + 8], eax -.Lloop: - mov edi, dword ptr [rsp + 0xc] - add edi, -1 - call 0 - mov ecx, dword ptr [rsp + 8] - add ecx, eax - mov dword ptr [rsp + 8], ecx - mov ecx, dword ptr [rsp + 0xc] - add ecx, -2 - mov dword ptr [rsp + 0xc], ecx - cmp ecx, 1 - ja .Lloop -.Lreturn: - mov eax, dword ptr [rsp + 8] - nop - add rsp, 0x18 - ret -``` - -Cranelift with optimisations enabled produces similar: - -```asm -fib: - push rbp - mov rbp, rsp - sub rsp, 0x20 - mov qword ptr [rsp + 0x10], rdi - mov dword ptr [rsp + 0x1c], esi - mov eax, 1 - mov dword ptr [rsp + 0x18], eax - mov eax, dword ptr [rsp + 0x1c] - cmp eax, 2 - jb .Lreturn - movabs rax, 0 - mov qword ptr [rsp + 8], rax -.Lloop: - mov eax, dword ptr [rsp + 0x1c] - add eax, -1 - mov rcx, qword ptr [rsp + 8] - mov rdx, qword ptr [rsp + 0x10] - mov rdi, rdx - mov esi, eax - call rcx - mov ecx, dword ptr [rsp + 0x18] - add eax, ecx - mov dword ptr [rsp + 0x18], eax - mov eax, dword ptr [rsp + 0x1c] - add eax, -2 - mov dword ptr [rsp + 0x1c], eax - mov eax, dword ptr [rsp + 0x1c] - cmp eax, 1 - ja .Lloop -.Lreturn: - mov eax, dword ptr [rsp + 0x18] - add rsp, 0x20 - pop rbp - ret -``` - -Whereas Lightbeam produces smaller code with far fewer memory accesses than both (and fewer blocks than Firefox's output): - -```asm -fib: - cmp esi, 2 - mov eax, 1 - jb .Lreturn - mov eax, 1 -.Lloop: - mov rcx, rsi - add ecx, 0xffffffff - push rsi - push rax - push rax - mov rsi, rcx - call fib - add eax, [rsp + 8] - mov rcx, [rsp + 0x10] - add ecx, 0xfffffffe - cmp ecx, 1 - mov rsi, rcx - lea rsp, [rsp + 0x18] - ja .Lloop -.Lreturn: - ret -``` - -Now obviously I'm not advocating for replacing Firefox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs. - -## Specification compliance - -Lightbeam passes 100% of the specification test suite, but that doesn't necessarily mean that it's 100% specification-compliant. Hopefully as we run a fuzzer against it we can find any issues and get Lightbeam to a state where it can be used in production. - -## Getting involved - -You can file issues in the [Wasmtime issue tracker][Wasmtime issue tracker]. If you want to get involved jump into the [Bytecode Alliance Zulip][bytecodealliance-zulip] and someone can direct you to the right place. I wish I could say "the most useful thing you can do is play with it and open issues where you find problems" but until it passes the spec suite that won't be very helpful. - -[bytecodealliance-zulip]: https://bytecodealliance.zulipchat.com/ -[Wasmtime issue tracker]: https://github.com/bytecodealliance/wasmtime/issues diff --git a/crates/lightbeam/examples/test.rs b/crates/lightbeam/examples/test.rs deleted file mode 100644 index 12d54193ae..0000000000 --- a/crates/lightbeam/examples/test.rs +++ /dev/null @@ -1,16 +0,0 @@ -use lightbeam::translate; - -const WAT: &str = r#" -(module - (func (param i32) (param i32) (result i32) (i32.add (get_local 0) (get_local 1))) -) -"#; - -fn main() -> anyhow::Result<()> { - let data = wat::parse_str(WAT)?; - let translated = translate(&data)?; - let result: u32 = translated.execute_func(0, (5u32, 3u32))?; - println!("f(5, 3) = {}", result); - - Ok(()) -} diff --git a/crates/lightbeam/src/backend.rs b/crates/lightbeam/src/backend.rs deleted file mode 100644 index 240364320b..0000000000 --- a/crates/lightbeam/src/backend.rs +++ /dev/null @@ -1,6134 +0,0 @@ -#![allow(clippy::float_cmp)] - -use self::registers::*; -use crate::error::Error; -use crate::microwasm::{BrTarget, Ieee32, Ieee64, SignlessType, Type, Value, F32, F64, I32, I64}; -use crate::module::ModuleContext; -use cranelift_codegen::{ - binemit, - ir::{self, SourceLoc, TrapCode}, -}; -use dynasm::dynasm; -use dynasmrt::x64::Assembler; -use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; -use std::{ - cmp::Ordering, - convert::{TryFrom, TryInto}, - fmt::Display, - hash::Hash, - iter, mem, - ops::{Deref, RangeInclusive}, -}; -// use wasmtime_environ::BuiltinFunctionIndex; - -mod magic { - /// An index type for builtin functions. - pub struct BuiltinFunctionIndex(u32); - - impl BuiltinFunctionIndex { - /// Returns an index for wasm's `memory.grow` builtin function. - pub const fn get_memory32_grow_index() -> Self { - Self(0) - } - /// Returns an index for wasm's imported `memory.grow` builtin function. - pub const fn get_imported_memory32_grow_index() -> Self { - Self(1) - } - /// Returns an index for wasm's `memory.size` builtin function. - pub const fn get_memory32_size_index() -> Self { - Self(2) - } - /// Returns an index for wasm's imported `memory.size` builtin function. - pub const fn get_imported_memory32_size_index() -> Self { - Self(3) - } - - /// Return the index as an u32 number. - pub const fn index(&self) -> u32 { - self.0 - } - } -} - -use magic::BuiltinFunctionIndex; - -/// Size of a pointer on the target in bytes. -const WORD_SIZE: u32 = 8; - -type RegId = u8; - -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] -pub enum GPR { - Rq(RegId), - Rx(RegId), -} - -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] -pub enum GPRType { - Rq, - Rx, -} - -impl From for GPRType { - fn from(other: SignlessType) -> GPRType { - match other { - I32 | I64 => GPRType::Rq, - F32 | F64 => GPRType::Rx, - } - } -} - -impl From for Option { - fn from(other: SignlessType) -> Self { - Some(other.into()) - } -} - -impl GPR { - fn type_(self) -> GPRType { - match self { - GPR::Rq(_) => GPRType::Rq, - GPR::Rx(_) => GPRType::Rx, - } - } - - fn rq(self) -> Option { - match self { - GPR::Rq(r) => Some(r), - GPR::Rx(_) => None, - } - } - - fn rx(self) -> Option { - match self { - GPR::Rx(r) => Some(r), - GPR::Rq(_) => None, - } - } -} - -fn arg_locs>( - types: I, -) -> impl ExactSizeIterator + DoubleEndedIterator + Clone -where - I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, -{ - // TODO: VmCtx is in the first register - let mut int_gpr_iter = INTEGER_ARGS_IN_GPRS.iter(); - let mut float_gpr_iter = FLOAT_ARGS_IN_GPRS.iter(); - let mut stack_idx = 0; - - types - .into_iter() - .map(move |ty| { - match ty { - I32 | I64 => int_gpr_iter.next(), - F32 | F64 => float_gpr_iter.next(), - } - .map(|&r| CCLoc::Reg(r)) - .unwrap_or_else(|| { - let out = CCLoc::Stack(stack_idx); - stack_idx += 1; - out - }) - }) - // Since we only advance the iterators based on the values in `types`, - // we can't do this lazily. - .collect::>() - .into_iter() -} - -fn arg_locs_skip_caller_vmctx>( - types: I, -) -> impl ExactSizeIterator + DoubleEndedIterator + Clone -where - I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, -{ - #[derive(Debug, Clone)] - struct WithInt { - caller_vmctx_ty: Option, - iter: I, - } - - impl Iterator for WithInt - where - I: Iterator, - { - type Item = SignlessType; - - fn next(&mut self) -> Option { - self.caller_vmctx_ty.take().or_else(|| self.iter.next()) - } - - fn size_hint(&self) -> (usize, Option) { - let ty_len = if self.caller_vmctx_ty.is_some() { 1 } else { 0 }; - let (lower, upper) = self.iter.size_hint(); - - (lower + ty_len, upper.map(|u| u + ty_len)) - } - } - - impl DoubleEndedIterator for WithInt - where - I: DoubleEndedIterator, - { - fn next_back(&mut self) -> Option { - self.iter - .next_back() - .or_else(|| self.caller_vmctx_ty.take()) - } - } - - impl ExactSizeIterator for WithInt where I: ExactSizeIterator {} - - arg_locs(WithInt { - caller_vmctx_ty: Some(I32), - iter: types.into_iter(), - }) - .skip(1) -} - -pub fn ret_locs(types: impl IntoIterator) -> Result, Error> { - let types = types.into_iter(); - let mut out = Vec::with_capacity(types.size_hint().0); - // TODO: VmCtx is in the first register - let mut int_gpr_iter = INTEGER_RETURN_GPRS.iter(); - let mut float_gpr_iter = FLOAT_RETURN_GPRS.iter(); - - for ty in types { - match ty { - I32 | I64 => match int_gpr_iter.next() { - None => { - return Err(Error::Microwasm( - "We don't support stack returns yet".to_string(), - )) - } - Some(val) => out.push(CCLoc::Reg(*val)), - }, - F32 | F64 => match float_gpr_iter.next() { - None => { - return Err(Error::Microwasm( - "We don't support stack returns yet".to_string(), - )) - } - Some(val) => out.push(CCLoc::Reg(*val)), - }, - } - } - - Ok(out) -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -struct GPRs { - bits: u16, -} - -impl GPRs { - fn new() -> Self { - Self { bits: 0 } - } -} - -#[allow(dead_code)] -pub mod registers { - use super::{RegId, GPR}; - - pub mod rq { - use super::RegId; - - pub const RAX: RegId = 0; - pub const RCX: RegId = 1; - pub const RDX: RegId = 2; - pub const RBX: RegId = 3; - pub const RSP: RegId = 4; - pub const RBP: RegId = 5; - pub const RSI: RegId = 6; - pub const RDI: RegId = 7; - pub const R8: RegId = 8; - pub const R9: RegId = 9; - pub const R10: RegId = 10; - pub const R11: RegId = 11; - pub const R12: RegId = 12; - pub const R13: RegId = 13; - pub const R14: RegId = 14; - pub const R15: RegId = 15; - } - - pub const RAX: GPR = GPR::Rq(self::rq::RAX); - pub const RCX: GPR = GPR::Rq(self::rq::RCX); - pub const RDX: GPR = GPR::Rq(self::rq::RDX); - pub const RBX: GPR = GPR::Rq(self::rq::RBX); - pub const RSP: GPR = GPR::Rq(self::rq::RSP); - pub const RBP: GPR = GPR::Rq(self::rq::RBP); - pub const RSI: GPR = GPR::Rq(self::rq::RSI); - pub const RDI: GPR = GPR::Rq(self::rq::RDI); - pub const R8: GPR = GPR::Rq(self::rq::R8); - pub const R9: GPR = GPR::Rq(self::rq::R9); - pub const R10: GPR = GPR::Rq(self::rq::R10); - pub const R11: GPR = GPR::Rq(self::rq::R11); - pub const R12: GPR = GPR::Rq(self::rq::R12); - pub const R13: GPR = GPR::Rq(self::rq::R13); - pub const R14: GPR = GPR::Rq(self::rq::R14); - pub const R15: GPR = GPR::Rq(self::rq::R15); - - pub const XMM0: GPR = GPR::Rx(0); - pub const XMM1: GPR = GPR::Rx(1); - pub const XMM2: GPR = GPR::Rx(2); - pub const XMM3: GPR = GPR::Rx(3); - pub const XMM4: GPR = GPR::Rx(4); - pub const XMM5: GPR = GPR::Rx(5); - pub const XMM6: GPR = GPR::Rx(6); - pub const XMM7: GPR = GPR::Rx(7); - pub const XMM8: GPR = GPR::Rx(8); - pub const XMM9: GPR = GPR::Rx(9); - pub const XMM10: GPR = GPR::Rx(10); - pub const XMM11: GPR = GPR::Rx(11); - pub const XMM12: GPR = GPR::Rx(12); - pub const XMM13: GPR = GPR::Rx(13); - pub const XMM14: GPR = GPR::Rx(14); - pub const XMM15: GPR = GPR::Rx(15); - - pub const NUM_GPRS: u8 = 16; -} - -const SIGN_MASK_F64: u64 = 0x8000_0000_0000_0000; -const REST_MASK_F64: u64 = !SIGN_MASK_F64; -const SIGN_MASK_F32: u32 = 0x8000_0000; -const REST_MASK_F32: u32 = !SIGN_MASK_F32; - -impl GPRs { - fn take(&mut self) -> Option { - let lz = self.bits.trailing_zeros(); - if lz < 16 { - let gpr = lz as RegId; - self.mark_used(gpr); - Some(gpr) - } else { - None - } - } - - fn mark_used(&mut self, gpr: RegId) { - self.bits &= !(1 << gpr as u16); - } - - fn release(&mut self, gpr: RegId) { - debug_assert!( - !self.is_free(gpr), - "released register {} was already free", - gpr - ); - self.bits |= 1 << gpr; - } - - fn is_free(self, gpr: RegId) -> bool { - (self.bits & (1 << gpr)) != 0 - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Registers { - /// Registers at 64 bits and below (al/ah/ax/eax/rax, for example) - scratch_64: (GPRs, [u8; NUM_GPRS as usize]), - /// Registers at 128 bits (xmm0, for example) - scratch_128: (GPRs, [u8; NUM_GPRS as usize]), -} - -impl Default for Registers { - fn default() -> Self { - Self::new() - } -} - -impl Registers { - pub fn new() -> Self { - Self { - scratch_64: (GPRs::new(), [1; NUM_GPRS as _]), - scratch_128: (GPRs::new(), [1; NUM_GPRS as _]), - } - } - - pub fn release_scratch_register(&mut self) -> Result<(), Error> { - // Give ourselves a few scratch registers to work with, for now. - for &scratch in SCRATCH_REGS { - self.release(scratch)?; - } - Ok(()) - } - - fn scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize])) { - match gpr { - GPR::Rq(r) => (r, &mut self.scratch_64), - GPR::Rx(r) => (r, &mut self.scratch_128), - } - } - - fn scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize])) { - match gpr { - GPR::Rq(r) => (r, &self.scratch_64), - GPR::Rx(r) => (r, &self.scratch_128), - } - } - - pub fn mark_used(&mut self, gpr: GPR) { - let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); - scratch_counts.0.mark_used(gpr); - scratch_counts.1[gpr as usize] += 1; - } - - pub fn num_usages(&self, gpr: GPR) -> u8 { - let (gpr, scratch_counts) = self.scratch_counts(gpr); - scratch_counts.1[gpr as usize] - } - - pub fn take(&mut self, ty: impl Into) -> Option { - let (mk_gpr, scratch_counts) = match ty.into() { - GPRType::Rq => (GPR::Rq as fn(_) -> _, &mut self.scratch_64), - GPRType::Rx => (GPR::Rx as fn(_) -> _, &mut self.scratch_128), - }; - - let out = scratch_counts.0.take()?; - scratch_counts.1[out as usize] += 1; - Some(mk_gpr(out)) - } - - pub fn release(&mut self, gpr: GPR) -> Result<(), Error> { - let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); - let c = &mut scratch_counts.1[gpr as usize]; - *c = match c.checked_sub(1) { - Some(e) => e, - None => return Err(Error::Microwasm(format!("Double-freed register: {}", gpr))), - }; - if *c == 0 { - scratch_counts.0.release(gpr); - } - Ok(()) - } - - pub fn is_free(&self, gpr: GPR) -> bool { - let (gpr, scratch_counts) = self.scratch_counts(gpr); - scratch_counts.0.is_free(gpr) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct BlockCallingConvention> { - pub stack_depth: StackDepth, - pub arguments: I, -} - -impl BlockCallingConvention { - pub fn function_start(arguments: I) -> Self { - BlockCallingConvention { - // We start and return the function with stack depth 1 since we must - // allow space for the saved return address. - stack_depth: StackDepth(1), - arguments, - } - } -} - -impl BlockCallingConvention -where - for<'a> &'a I::Target: IntoIterator, -{ - pub fn as_ref(&self) -> BlockCallingConvention + '_> { - BlockCallingConvention { - // We start and return the function with stack depth 1 since we must - // allow space for the saved return address. - stack_depth: self.stack_depth.clone(), - arguments: self.arguments.into_iter().copied(), - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum FunctionDefLocation { - SameModule, - PossiblyExternal, -} - -// TODO: Combine this with `ValueLocation`? -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum CCLoc { - /// Value exists in a register. - Reg(GPR), - /// Value exists on the stack. - Stack(i32), -} - -impl CCLoc { - fn try_from(other: ValueLocation) -> Option { - match other { - ValueLocation::Reg(reg) => Some(CCLoc::Reg(reg)), - ValueLocation::Stack(offset) => Some(CCLoc::Stack(offset)), - ValueLocation::Cond(_) | ValueLocation::Immediate(_) => None, - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum CondCode { - CF0, - CF1, - ZF0, - ZF1, - CF0AndZF0, - CF1OrZF1, - ZF0AndSFEqOF, - ZF1OrSFNeOF, - SFEqOF, - SFNeOF, -} - -mod cc { - use super::CondCode; - - pub const EQUAL: CondCode = CondCode::ZF0; - pub const NOT_EQUAL: CondCode = CondCode::ZF1; - pub const GE_U: CondCode = CondCode::CF0; - pub const LT_U: CondCode = CondCode::CF1; - pub const GT_U: CondCode = CondCode::CF0AndZF0; - pub const LE_U: CondCode = CondCode::CF1OrZF1; - pub const GE_S: CondCode = CondCode::SFEqOF; - pub const LT_S: CondCode = CondCode::SFNeOF; - pub const GT_S: CondCode = CondCode::ZF0AndSFEqOF; - pub const LE_S: CondCode = CondCode::ZF1OrSFNeOF; -} - -impl std::ops::Not for CondCode { - type Output = Self; - - fn not(self) -> Self { - use CondCode::*; - - match self { - CF0 => CF1, - CF1 => CF0, - ZF0 => ZF1, - ZF1 => ZF0, - CF0AndZF0 => CF1OrZF1, - CF1OrZF1 => CF0AndZF0, - ZF0AndSFEqOF => ZF1OrSFNeOF, - ZF1OrSFNeOF => ZF0AndSFEqOF, - SFEqOF => SFNeOF, - SFNeOF => SFEqOF, - } - } -} - -/// Describes location of a value. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ValueLocation { - /// Value exists in a register. - Reg(GPR), - /// Value exists on the stack. Note that this offset is from the rsp as it - /// was when we entered the function. - Stack(i32), - /// Value is a literal - Immediate(Value), - /// Value is a set condition code - Cond(CondCode), -} - -impl From for ValueLocation { - fn from(other: CCLoc) -> Self { - match other { - CCLoc::Reg(r) => ValueLocation::Reg(r), - CCLoc::Stack(o) => ValueLocation::Stack(o), - } - } -} - -impl ValueLocation { - fn stack(self) -> Option { - match self { - ValueLocation::Stack(o) => Some(o), - _ => None, - } - } - - fn reg(self) -> Option { - match self { - ValueLocation::Reg(r) => Some(r), - _ => None, - } - } - - fn immediate(self) -> Option { - match self { - ValueLocation::Immediate(i) => Some(i), - _ => None, - } - } - - fn imm_i32(self) -> Option { - self.immediate().and_then(Value::as_i32) - } - - fn imm_i64(self) -> Option { - self.immediate().and_then(Value::as_i64) - } - - fn imm_f32(self) -> Option { - self.immediate().and_then(Value::as_f32) - } - - fn imm_f64(self) -> Option { - self.immediate().and_then(Value::as_f64) - } -} - -// TODO: This assumes only system-v calling convention. -// In system-v calling convention the first 6 arguments are passed via registers. -// All rest arguments are passed on the stack. -// Usually system-v uses rdi and rsi, but rdi is used for the vmctx and rsi is used for the _caller_ vmctx -const INTEGER_ARGS_IN_GPRS: &[GPR] = &[GPR::Rq(CALLER_VMCTX), RDX, RCX, R8, R9]; -const INTEGER_RETURN_GPRS: &[GPR] = &[RAX, RDX]; -const FLOAT_ARGS_IN_GPRS: &[GPR] = &[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]; -const FLOAT_RETURN_GPRS: &[GPR] = &[XMM0, XMM1]; -// List of scratch registers taken from https://wiki.osdev.org/System_V_ABI -const SCRATCH_REGS: &[GPR] = &[ - RDX, RCX, R8, R9, RAX, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, - XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, -]; -const VMCTX: RegId = rq::RDI; -const CALLER_VMCTX: RegId = rq::RSI; - -pub struct CodeGenSession<'module, M> { - assembler: Assembler, - pub module_context: &'module M, - pub op_offset_map: Vec<(AssemblyOffset, Box)>, - func_starts: Vec<(Option, DynamicLabel)>, - pointer_type: SignlessType, -} - -impl<'module, M> CodeGenSession<'module, M> { - pub fn new(func_count: u32, module_context: &'module M, pointer_type: SignlessType) -> Self { - let mut assembler = Assembler::new().unwrap(); - let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label())) - .take(func_count as usize) - .collect::>(); - - CodeGenSession { - assembler, - op_offset_map: Default::default(), - func_starts, - module_context, - pointer_type, - } - } - - pub fn offset(&self) -> usize { - self.assembler.offset().0 - } - - pub fn pointer_type(&self) -> SignlessType { - self.pointer_type - } - - pub fn new_context<'this>( - &'this mut self, - func_idx: u32, - reloc_sink: &'this mut dyn binemit::RelocSink, - ) -> Context<'this, M> { - { - let func_start = &mut self.func_starts[func_idx as usize]; - - // At this point we know the exact start address of this function. Save it - // and define dynamic label at this location. - func_start.0 = Some(self.assembler.offset()); - self.assembler.dynamic_label(func_start.1); - } - - Context { - asm: &mut self.assembler, - current_function: func_idx, - reloc_sink, - pointer_type: self.pointer_type, - source_loc: Default::default(), - func_starts: &self.func_starts, - block_state: Default::default(), - module_context: self.module_context, - labels: Default::default(), - } - } - - pub fn into_translated_code_section(self) -> Result { - let exec_buf = self - .assembler - .finalize() - .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?; - let func_starts = self - .func_starts - .iter() - .map(|(offset, _)| offset.unwrap()) - .collect::>(); - Ok(TranslatedCodeSection { - exec_buf, - func_starts, - op_offset_map: self.op_offset_map, - // TODO - relocatable_accesses: vec![], - }) - } -} - -#[derive(Debug)] -struct RelocateAddress { - reg: Option, - imm: usize, -} - -#[derive(Debug)] -struct RelocateAccess { - position: AssemblyOffset, - dst_reg: GPR, - address: RelocateAddress, -} - -pub struct TranslatedCodeSection { - exec_buf: ExecutableBuffer, - func_starts: Vec, - #[allow(dead_code)] - relocatable_accesses: Vec, - op_offset_map: Vec<(AssemblyOffset, Box)>, -} - -impl TranslatedCodeSection { - pub fn func_start(&self, idx: usize) -> *const u8 { - let offset = self.func_starts[idx]; - self.exec_buf.ptr(offset) - } - - pub fn func_range(&self, idx: usize) -> std::ops::Range { - let end = self - .func_starts - .get(idx + 1) - .map(|i| i.0) - .unwrap_or_else(|| self.exec_buf.len()); - - self.func_starts[idx].0..end - } - - pub fn funcs<'a>(&'a self) -> impl Iterator> + 'a { - (0..self.func_starts.len()).map(move |i| self.func_range(i)) - } - - pub fn buffer(&self) -> &[u8] { - &*self.exec_buf - } - - pub fn disassemble(&self) { - crate::disassemble::disassemble(&*self.exec_buf, &self.op_offset_map).unwrap(); - } -} - -#[derive(Debug, Default, Clone)] -pub struct BlockState { - pub stack: Stack, - pub depth: StackDepth, - pub regs: Registers, -} - -type Stack = Vec; - -mod labels { - use super::Label; - use std::collections::HashMap; - - pub struct LabelInfo { - pub label: Label, - pub align: u32, - pub inner: LabelValue, - } - - #[derive(Copy, Clone, PartialEq, Eq, Hash)] - pub enum LabelValue { - Ret, - I32(i32), - I64(i64), - } - - #[derive(Default)] - pub struct Labels { - map: HashMap, - } - - impl Labels { - pub fn drain(&mut self) -> impl Iterator + '_ { - self.map.drain().map(|(_, info)| info) - } - - pub fn insert( - &mut self, - l: impl FnOnce() -> Label, - align: u32, - label: LabelValue, - ) -> Label { - let val = self.map.entry(label).or_insert_with(move || LabelInfo { - label: l(), - align, - inner: label, - }); - - val.align = val.align.max(align); - - val.label - } - } -} - -use labels::{LabelInfo, LabelValue, Labels}; - -pub struct Context<'this, M> { - pub asm: &'this mut Assembler, - pointer_type: SignlessType, - #[allow(dead_code)] - source_loc: SourceLoc, - reloc_sink: &'this mut dyn binemit::RelocSink, - module_context: &'this M, - current_function: u32, - func_starts: &'this Vec<(Option, DynamicLabel)>, - /// Each push and pop on the value stack increments or decrements this value by 1 respectively. - pub block_state: BlockState, - labels: Labels, -} - -/// Label in code. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Label(DynamicLabel); - -/// Offset from starting value of SP counted in words. -#[derive(Default, Debug, Clone, PartialEq, Eq)] -pub struct StackDepth(u32); - -impl StackDepth { - pub fn reserve(&mut self, slots: u32) { - self.0 = self.0.checked_add(slots).unwrap(); - } - - pub fn free(&mut self, slots: u32) { - self.0 = self.0.checked_sub(slots).unwrap(); - } -} - -macro_rules! int_div { - ($full_div_s:ident, $full_div_u:ident, $div_u:ident, $div_s:ident, $rem_u:ident, $rem_s:ident, $imm_fn:ident, $signed_ty:ty, $unsigned_ty:ty, $reg_ty:tt, $pointer_ty:tt) => { - // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when - // emitting Wasm. - pub fn $div_u(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - <$unsigned_ty>::wrapping_div(dividend as _, divisor as _).into(), - ))?; - } - - return Ok(()) - } - - let (div, rem, saved) = self.$full_div_u(divisor, dividend)?; - - self.free_value(rem)?; - - let div = match div { - ValueLocation::Reg(div) => { - if saved.clone().any(|dst| dst == div) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap()) - ); - self.block_state.regs.release(div)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(div) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => div, - }; - - self.cleanup_gprs(saved); - - self.push(div)?; - Ok(()) - } - - // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when - // emitting Wasm. - pub fn $div_s(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - <$signed_ty>::wrapping_div(dividend, divisor).into(), - ))?; - } - - return Ok(()) - } - - let (div, rem, saved) = self.$full_div_s(divisor, dividend)?; - - self.free_value(rem)?; - - let div = match div { - ValueLocation::Reg(div) => { - if saved.clone().any(|dst| dst == div) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap()) - ); - self.block_state.regs.release(div)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(div) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => div, - }; - - self.cleanup_gprs(saved); - - self.push(div)?; - Ok(()) - } - - pub fn $rem_u(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - (dividend as $unsigned_ty % divisor as $unsigned_ty).into(), - ))?; - } - return Ok(()); - } - - let (div, rem, saved) = self.$full_div_u(divisor, dividend)?; - - self.free_value(div)?; - - let rem = match rem { - ValueLocation::Reg(rem) => { - if saved.clone().any(|dst| dst == rem) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap()) - ); - self.block_state.regs.release(rem)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(rem) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => rem, - }; - - self.cleanup_gprs(saved); - - self.push(rem)?; - Ok(()) - } - - pub fn $rem_s(&mut self) -> Result<(), Error>{ - let mut divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?; - } else { - self.push(ValueLocation::Immediate((dividend % divisor).into()))?; - } - return Ok(()); - } - - let is_neg1 = self.create_label(); - - let current_depth = self.block_state.depth.clone(); - - // TODO: This could cause segfaults because of implicit push/pop - let gen_neg1_case = match divisor { - ValueLocation::Immediate(_) => { - if divisor.$imm_fn().unwrap() == -1 { - self.push(ValueLocation::Immediate((-1 as $signed_ty).into()))?; - self.free_value(dividend)?; - return Ok(()); - } - - false - } - ValueLocation::Reg(_) => { - let reg = self.put_into_register(GPRType::Rq, &mut divisor)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp $reg_ty(reg.rq().unwrap()), -1 - ); - // TODO: We could choose `current_depth` as the depth here instead but we currently - // don't for simplicity - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; je =>is_neg1.0 - ); - - true - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp $pointer_ty [rsp + offset], -1 - ); - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; je =>is_neg1.0 - ); - - true - } - ValueLocation::Cond(_) => { - // `cc` can never be `-1`, only `0` and `1` - false - } - }; - - let (div, rem, saved) = self.$full_div_s(divisor, dividend)?; - - self.free_value(div)?; - - let rem = match rem { - ValueLocation::Reg(rem) => { - if saved.clone().any(|dst| dst == rem) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap()) - ); - self.block_state.regs.release(rem)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(rem) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => rem, - }; - - self.cleanup_gprs(saved); - - if gen_neg1_case { - let ret = self.create_label(); - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; jmp =>ret.0 - ); - self.define_label(is_neg1); - - let dst_ccloc = match CCLoc::try_from(rem) { - None => { - return Err(Error::Microwasm( - "$rem_s Programmer error".to_string(), - )) - } - Some(o) => o, - }; - - self.copy_value( - ValueLocation::Immediate((0 as $signed_ty).into()), - dst_ccloc - )?; - - self.set_stack_depth(current_depth.clone())?; - self.define_label(ret); - } - - self.push(rem)?; - Ok(()) - } - } -} - -macro_rules! unop { - ($name:ident, $instr:ident, $reg_ty:tt, $typ:ty, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => - ValueLocation::Immediate( - ($const_fallback(imm.as_int().unwrap() as $typ) as $typ).into() - ), - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(Type::for_::<$typ>()).unwrap(); - dynasm!(self.asm - ; $instr $reg_ty(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = self.put_into_register(GPRType::Rq, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let temp = self.take_reg(Type::for_::<$typ>()).unwrap(); - dynasm!(self.asm - ; $instr $reg_ty(temp.rq().unwrap()), $reg_ty(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - } -} - -macro_rules! conversion { - ( - $name:ident, - $instr:ident, - $in_reg_ty:tt, - $in_reg_fn:ident, - $out_reg_ty:tt, - $out_reg_fn:ident, - $in_typ:ty, - $out_typ:ty, - $const_ty_fn:ident, - $const_fallback:expr - ) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => - ValueLocation::Immediate( - $const_fallback(imm.$const_ty_fn().unwrap()).into() - ), - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap(); - dynasm!(self.asm - ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), [rsp + offset] - ); - - ValueLocation::Reg(temp) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = self.put_into_register(Type::for_::<$in_typ>(), &mut val)? .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap(); - - dynasm!(self.asm - ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), $in_reg_ty(reg.$in_reg_fn().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - } -} - -// TODO: Support immediate `count` parameters -macro_rules! shift { - ($name:ident, $reg_ty:tt, $instr:ident, $const_fallback:expr, $ty:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut count = self.pop()?; - let mut val = self.pop()?; - - if let Some(imm) = count.immediate() { - if let Some(imm) = imm.as_int() { - if let Ok(imm) = i8::try_from(imm) { - let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(reg.rq().unwrap()), imm - ); - self.push(ValueLocation::Reg(reg))?; - return Ok(()); - } - } - } - - if val == ValueLocation::Reg(RCX) { - let new = self.take_reg($ty).unwrap(); - self.copy_value(val, CCLoc::Reg(new))?; - self.free_value(val)?; - val = ValueLocation::Reg(new); - } - - // TODO: Maybe allocate `RCX`, write `count` to it and then free `count`. - // Once we've implemented refcounting this will do the right thing - // for free. - let temp_rcx = match count { - ValueLocation::Reg(RCX) => {None} - other => { - let out = if self.block_state.regs.is_free(RCX) { - None - } else { - let new_reg = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new_reg.rq().unwrap()), rcx - ); - Some(new_reg) - }; - - match other { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let gpr = self.put_into_register(I32, &mut count)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - dynasm!(self.asm - ; mov cl, Rb(gpr.rq().unwrap()) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; mov cl, [rsp + offset] - ); - } - ValueLocation::Immediate(imm) => { - dynasm!(self.asm - ; mov cl, imm.as_int().unwrap() as i8 - ); - } - } - - out - } - }; - - self.free_value(count)?; - self.block_state.regs.mark_used(RCX); - count = ValueLocation::Reg(RCX); - - let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(reg.rq().unwrap()), cl - ); - - self.free_value(count)?; - - if let Some(gpr) = temp_rcx { - dynasm!(self.asm - ; mov rcx, Rq(gpr.rq().unwrap()) - ); - self.block_state.regs.release(gpr)?; - } - - self.push(val)?; - Ok(()) - } - } -} - -macro_rules! cmp_i32 { - ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let Some(i) = left.imm_i32() { - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; cmp DWORD [rsp + offset], i - ); - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - dynasm!(self.asm - ; cmp Rd(rreg.rq().unwrap()), i - ); - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Immediate(right) => { - ValueLocation::Immediate( - (if $const_fallback(i, right.as_i32().unwrap()) { - 1i32 - } else { - 0i32 - }).into() - ) - } - } - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap()) - ); - } - ValueLocation::Immediate(i) => { - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), i.as_i32().unwrap() - ); - } - } - - ValueLocation::Cond($flags) - }; - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - } -} - -macro_rules! cmp_i64 { - ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let Some(i) = left.imm_i64() { - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp QWORD [rsp + offset], i - ); - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp QWORD [rsp + offset], Rq(lreg.rq().unwrap()) - ); - } - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp Rq(rreg.rq().unwrap()), i - ); - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(rreg.rq().unwrap()), Rq(lreg.rq().unwrap()) - ); - } - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Immediate(right) => { - ValueLocation::Immediate( - (if $const_fallback(i, right.as_i64().unwrap()) { - 1i32 - } else { - 0i32 - }).into() - ) - } - } - } else { - let lreg = self.put_into_register(I64, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - } - ValueLocation::Immediate(i) => { - let i = i.as_i64().unwrap(); - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), i - ); - } else { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - } - } - } - - ValueLocation::Cond($flags) - }; - - self.free_value(left)?; - self.free_value(right)?; - self.push(out)?; - Ok(()) - } - } -} - -macro_rules! cmp_f32 { - ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - cmp_float!( - comiss, - f32, - imm_f32, - $name, - $reverse_name, - $instr, - $const_fallback - ); - }; -} - -macro_rules! eq_float { - ($name:ident, $instr:ident, $imm_fn:ident, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - if $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()) { - 1u32 - } else { - 0 - }.into() - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right), - _ => (right, left) - }; - - let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; movd Rd(out.rq().unwrap()), Rx(lreg.rx().unwrap()) - ; and Rd(out.rq().unwrap()), 1 - ); - - self.push(ValueLocation::Reg(out))?; - self.free_value(left)?; - self.free_value(right)?; - Ok(()) - } - - } -} - -macro_rules! minmax_float { - ( - $name:ident, - $instr:ident, - $cmpinstr:ident, - $addinstr:ident, - $combineinstr:ident, - $imm_fn:ident, - $const_fallback:expr - ) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()).into() - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right), - _ => (right, left) - }; - - let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $cmpinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; je >equal - ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; jmp >ret - ; equal: - ; jnp >equal_but_not_parity - ; $addinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; jmp >ret - ; equal_but_not_parity: - ; $combineinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; ret: - ); - - self.push(left)?; - self.free_value(right)?; - Ok(()) - } - - } -} - -macro_rules! cmp_f64 { - ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - cmp_float!( - comisd, - f64, - imm_f64, - $name, - $reverse_name, - $instr, - $const_fallback - ); - }; -} - -macro_rules! cmp_float { - (@helper $cmp_instr:ident, $ty:ty, $imm_fn:ident, $self:expr, $left:expr, $right:expr, $instr:ident, $const_fallback:expr) => {{ - let (left, right, this) = ($left, $right, $self); - if let (Some(left), Some(right)) = (left.$imm_fn(), right.$imm_fn()) { - if $const_fallback(<$ty>::from_bits(left.to_bits()), <$ty>::from_bits(right.to_bits())) { - ValueLocation::Immediate(1i32.into()) - } else { - ValueLocation::Immediate(0i32.into()) - } - } else { - let lreg = this.put_into_register(GPRType::Rx, left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let result = this.take_reg(I32).unwrap(); - - match right { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - - dynasm!(this.asm - ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap()) - ; $cmp_instr Rx(lreg.rx().unwrap()), [rsp + offset] - ; $instr Rb(result.rq().unwrap()) - ); - } - right => { - let rreg = this.put_into_register(GPRType::Rx, right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(this.asm - ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap()) - ; $cmp_instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; $instr Rb(result.rq().unwrap()) - ); - } - } - - ValueLocation::Reg(result) - } - }}; - ($cmp_instr:ident, $ty:ty, $imm_fn:ident, $name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = cmp_float!(@helper - $cmp_instr, - $ty, - $imm_fn, - &mut *self, - &mut left, - &mut right, - $instr, - $const_fallback - ); - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - - pub fn $reverse_name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = cmp_float!(@helper - $cmp_instr, - $ty, - $imm_fn, - &mut *self, - &mut right, - &mut left, - $instr, - $const_fallback - ); - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - }; -} - -macro_rules! binop_i32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - $const_fallback, - Rd, - rq, - I32, - imm_i32, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rd(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! commutative_binop_i32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - $const_fallback, - Rd, - rq, - I32, - imm_i32, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rd(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! binop_i64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - $const_fallback, - Rq, - rq, - I64, - imm_i64, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rq(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! commutative_binop_i64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - $const_fallback, - Rq, - rq, - I64, - imm_i64, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rq(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! binop_f32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F32, - imm_f32, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! commutative_binop_f32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F32, - imm_f32, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! binop_f64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F64, - imm_f64, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! commutative_binop_f64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F64, - imm_f64, - |_, _, _: i32| unreachable!() - ); - }; -} -macro_rules! commutative_binop { - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => { - binop!( - $name, - $instr, - $const_fallback, - $reg_ty, - $reg_fn, - $ty, - $imm_fn, - $direct_imm, - |op1: ValueLocation, op0: ValueLocation| match op1 { - ValueLocation::Reg(_) => (op1, op0), - _ => { - if op0.immediate().is_some() { - (op1, op0) - } else { - (op0, op1) - } - } - } - ); - }; -} - -macro_rules! binop { - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => { - binop!($name, $instr, $const_fallback, $reg_ty, $reg_fn, $ty, $imm_fn, $direct_imm, |a, b| (a, b)); - }; - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr, $map_op:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(i1) = left.$imm_fn() { - if let Some(i0) = right.$imm_fn() { - self.block_state.stack.push(ValueLocation::Immediate($const_fallback(i1, i0).into())); - return Ok(()); - } - } - - let (mut left, mut right) = $map_op(left, right); - let lreg = self.put_into_temp_register($ty, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - // This handles the case where we (for example) have a float in an `Rq` reg - let right_reg = self.put_into_register($ty, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(right_reg.$reg_fn().unwrap()) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - if let Some(i) = i.as_int().and_then(|i| i.try_into().ok()) { - $direct_imm(&mut *self, lreg, i); - } else { - let scratch = self.take_reg($ty).unwrap(); - self.immediate_to_reg(scratch, i)?; - - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(scratch.$reg_fn().unwrap()) - ); - - self.block_state.regs.release(scratch)?; - } - } - } - - self.free_value(right)?; - self.push(left)?; - Ok(()) - } - } -} - -macro_rules! load { - (@inner $name:ident, $rtype:expr, $reg_ty:tt, $emit_fn:expr) => { - pub fn $name(&mut self, offset: u32) -> Result<(), Error> { - fn load_to_reg<_M: ModuleContext>( - ctx: &mut Context<_M>, - dst: GPR, - (offset, runtime_offset): (i32, Result) - ) -> Result<(), Error> { - let mem_index = 0; - let reg_offset = ctx.module_context - .defined_memory_index(mem_index) - .map(|index| ( - None, - ctx.module_context.vmctx_vmmemory_definition(index) as i32 - )); - let (reg, mem_offset) = reg_offset.unwrap_or_else(|| { - let reg = ctx.take_reg(I64).unwrap(); - - dynasm!(ctx.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let vmctx = GPR::Rq(VMCTX); - - if ctx.module_context.emit_memory_bounds_check() { - let addr_reg = match runtime_offset { - Ok(imm) => { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64 - ); - addr_reg - } - Err(gpr) => { - if offset == 0 { - ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } else if offset > 0 { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset] - ); - addr_reg - } else { - let addr_reg = ctx.take_reg(I64).unwrap(); - let offset_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rd(offset_reg.rq().unwrap()), offset - ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap()) - ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap()) - ); - ctx.block_state.regs.release(offset_reg)?; - addr_reg - } - } - }; - dynasm!(ctx.asm - ; cmp Rq(addr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_current_length() as i32 - ] - ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds) - ); - ctx.block_state.regs.release(addr_reg)?; - } - - let mem_ptr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(mem_ptr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_base() as i32 - ] - ); - if let Some(reg) = reg { - ctx.block_state.regs.release(reg)?; - } - $emit_fn(ctx, dst, mem_ptr_reg, runtime_offset, offset)?; - ctx.block_state.regs.release(mem_ptr_reg)?; - Ok(()) - } - - let base = self.pop()?; - - let temp = self.take_reg($rtype).unwrap(); - - match base { - ValueLocation::Immediate(i) => { - load_to_reg(self, temp, (offset as _, Ok(i.as_i32().unwrap())))?; - } - mut base => { - let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - load_to_reg(self, temp, (offset as _, Err(gpr)))?; - self.free_value(base)?; - } - } - - self.push(ValueLocation::Reg(temp))?; - Ok(()) - } - }; - ($name:ident, $rtype:expr, $reg_ty:tt, NONE, $rq_instr:ident, $ty:ident) => { - load!(@inner - $name, - $rtype, - $reg_ty, - |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| -> Result<(), Error> { - match runtime_offset { - Ok(imm) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] - ); - Ok(()) - } - Err(offset_reg) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - } - } - ); - }; - ($name:ident, $rtype:expr, $reg_ty:tt, $xmm_instr:ident, $rq_instr:ident, $ty:ident) => { - load!(@inner - $name, - $rtype, - $reg_ty, - |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| -> Result<(), Error> { - match (dst, runtime_offset) { - (GPR::Rq(r), Ok(imm)) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] - ); - Ok(()) - } - (GPR::Rx(r), Ok(imm)) => { - if let Some(combined) = offset.checked_add(imm) { - dynasm!(ctx.asm - ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + combined] - ); - Ok(()) - } else { - let offset_reg = ctx.take_reg(GPRType::Rq).unwrap(); - dynasm!(ctx.asm - ; mov Rq(offset_reg.rq().unwrap()), offset - ; $xmm_instr Rx(r), $ty [ - Rq(mem_ptr_reg.rq().unwrap()) + - Rq(offset_reg.rq().unwrap()) + - imm - ] - ); - ctx.block_state.regs.release(offset_reg)?; - Ok(()) - } - } - (GPR::Rq(r), Err(offset_reg)) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - (GPR::Rx(r), Err(offset_reg)) => { - dynasm!(ctx.asm - ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - } - } - ); - }; -} - -macro_rules! store { - (@inner $name:ident, $int_reg_ty:tt, $match_offset:expr, $size:ident) => { - pub fn $name(&mut self, offset: u32) -> Result<(), Error>{ - fn store_from_reg<_M: ModuleContext>( - ctx: &mut Context<_M>, - src: GPR, - (offset, runtime_offset): (i32, Result) - ) -> Result<(), Error> { - let mem_index = 0; - let reg_offset = ctx.module_context - .defined_memory_index(mem_index) - .map(|index| ( - None, - ctx.module_context.vmctx_vmmemory_definition(index) as i32 - )); - let (reg, mem_offset) = reg_offset.unwrap_or_else(|| { - let reg = ctx.take_reg(I64).unwrap(); - - dynasm!(ctx.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let vmctx = GPR::Rq(VMCTX); - - if ctx.module_context.emit_memory_bounds_check() { - let addr_reg = match runtime_offset { - Ok(imm) => { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64 - ); - addr_reg - } - Err(gpr) => { - if offset == 0 { - ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - - } else if offset > 0 { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset] - ); - addr_reg - } else { - let addr_reg = ctx.take_reg(I64).unwrap(); - let offset_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rd(offset_reg.rq().unwrap()), offset - ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap()) - ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap()) - ); - ctx.block_state.regs.release(offset_reg)?; - addr_reg - } - } - }; - dynasm!(ctx.asm - ; cmp Rq(addr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_current_length() as i32 - ] - ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds) - ); - ctx.block_state.regs.release(addr_reg)?; - } - - let mem_ptr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(mem_ptr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_base() as i32 - ] - ); - if let Some(reg) = reg { - ctx.block_state.regs.release(reg)?; - } - let src = $match_offset(ctx, mem_ptr_reg, runtime_offset, offset, src)?; - ctx.block_state.regs.release(mem_ptr_reg)?; - ctx.block_state.regs.release(src)?; - Ok(()) - } - - if !(offset <= i32::max_value() as u32) { - return Err(Error::Microwasm(format!("store: offset value too big {}", offset))) - } - - let mut src = self.pop()?; - let base = self.pop()?; - - // `store_from_reg` frees `src` - // TODO: Would it be better to free it outside `store_from_reg`? - let src_reg = self.put_into_register(None, &mut src)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - - match base { - ValueLocation::Immediate(i) => { - store_from_reg(self, src_reg, (offset as i32, Ok(i.as_i32().unwrap())))? - } - mut base => { - let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - store_from_reg(self, src_reg, (offset as i32, Err(gpr)))?; - self.free_value(base)?; - } - } - Ok(()) - } - }; - ($name:ident, $int_reg_ty:tt, NONE, $size:ident) => { - store!(@inner - $name, - $int_reg_ty, - |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32, src| -> Result { - let src_reg = ctx.put_into_temp_register(GPRType::Rq, &mut ValueLocation::Reg(src))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match runtime_offset { - Ok(imm) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(src_reg.rq().unwrap()) - ); - } - Err(offset_reg) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(src_reg.rq().unwrap()) - ); - } - } - - Ok(src_reg) - }, - $size - ); - }; - ($name:ident, $int_reg_ty:tt, $xmm_instr:ident, $size:ident) => { - store!(@inner - $name, - $int_reg_ty, - |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32, src| -> Result { - match (runtime_offset, src) { - (Ok(imm), GPR::Rq(r)) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(r) - ); - } - (Ok(imm), GPR::Rx(r)) => { - dynasm!(ctx.asm - ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], Rx(r) - ); - } - (Err(offset_reg), GPR::Rq(r)) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(r) - ); - } - (Err(offset_reg), GPR::Rx(r)) => { - dynasm!(ctx.asm - ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], Rx(r) - ); - } - } - - Ok(src) - }, - $size - ); - }; -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct VirtualCallingConvention { - pub stack: Stack, - pub depth: StackDepth, -} - -impl<'this, M: ModuleContext> Context<'this, M> { - fn free_reg(&mut self, type_: GPRType) -> Result { - let pos = if let Some(pos) = self - .block_state - .stack - .iter() - .position(|r| r.reg().map(|reg| reg.type_() == type_).unwrap_or(false)) - { - pos - } else { - return Ok(false); - }; - - let old_loc = self.block_state.stack[pos]; - let new_loc = self.push_physical(old_loc)?; - self.block_state.stack[pos] = new_loc; - - let reg = old_loc.reg().unwrap(); - - for elem in &mut self.block_state.stack[pos + 1..] { - if *elem == old_loc { - *elem = new_loc; - self.block_state.regs.release(reg)?; - } - } - - Ok(true) - } - - fn take_reg(&mut self, r: impl Into) -> Option { - let r = r.into(); - loop { - if let Some(gpr) = self.block_state.regs.take(r) { - break Some(gpr); - } - - if self.free_reg(r) == Ok(false) { - break None; - } - } - } - - pub fn set_source_loc(&mut self, loc: SourceLoc) { - self.source_loc = loc; - } - - pub fn virtual_calling_convention(&self) -> VirtualCallingConvention { - VirtualCallingConvention { - stack: self.block_state.stack.clone(), - depth: self.block_state.depth.clone(), - } - } - - /// Create a new undefined label. - pub fn create_label(&mut self) -> Label { - Label(self.asm.new_dynamic_label()) - } - - fn adjusted_offset(&self, offset: i32) -> i32 { - (self.block_state.depth.0 as i32 + offset) * WORD_SIZE as i32 - } - - cmp_i32!(i32_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b); - cmp_i32!(i32_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b); - // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous - cmp_i32!(i32_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u32) < (b as u32)); - cmp_i32!(i32_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u32) - <= (b as u32)); - cmp_i32!(i32_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u32) > (b as u32)); - cmp_i32!(i32_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u32) - >= (b as u32)); - cmp_i32!(i32_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b); - cmp_i32!(i32_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b); - cmp_i32!(i32_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b); - cmp_i32!(i32_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b); - - cmp_i64!(i64_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b); - cmp_i64!(i64_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b); - // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous - cmp_i64!(i64_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u64) < (b as u64)); - cmp_i64!(i64_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u64) - <= (b as u64)); - cmp_i64!(i64_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u64) > (b as u64)); - cmp_i64!(i64_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u64) - >= (b as u64)); - cmp_i64!(i64_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b); - cmp_i64!(i64_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b); - cmp_i64!(i64_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b); - cmp_i64!(i64_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b); - - cmp_f32!(f32_gt, f32_lt, seta, |a, b| a > b); - cmp_f32!(f32_ge, f32_le, setnc, |a, b| a >= b); - eq_float!( - f32_eq, - cmpeqss, - as_f32, - |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) == f32::from_bits(b.to_bits()) - ); - eq_float!( - f32_ne, - cmpneqss, - as_f32, - |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) != f32::from_bits(b.to_bits()) - ); - - cmp_f64!(f64_gt, f64_lt, seta, |a, b| a > b); - cmp_f64!(f64_ge, f64_le, setnc, |a, b| a >= b); - eq_float!( - f64_eq, - cmpeqsd, - as_f64, - |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) == f64::from_bits(b.to_bits()) - ); - eq_float!( - f64_ne, - cmpneqsd, - as_f64, - |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) != f64::from_bits(b.to_bits()) - ); - - // TODO: Should we do this logic in `eq` and just have this delegate to `eq`? - // That would mean that `eqz` and `eq` with a const 0 argument don't - // result in different code. It would also allow us to generate better - // code for `neq` and `gt_u` with const 0 operand - pub fn i32_eqz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - if let ValueLocation::Immediate(Value::I32(i)) = val { - self.push(ValueLocation::Immediate( - (if i == 0 { 1i32 } else { 0 }).into(), - ))?; - return Ok(()); - } - - if let ValueLocation::Cond(loc) = val { - self.push(ValueLocation::Cond(!loc))?; - return Ok(()); - } - - let reg = self - .put_into_register(I32, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap()) - ; test Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; setz Rb(out.rq().unwrap()) - ); - - self.free_value(val)?; - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - pub fn i64_eqz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - if let ValueLocation::Immediate(Value::I64(i)) = val { - self.push(ValueLocation::Immediate( - (if i == 0 { 1i32 } else { 0 }).into(), - ))?; - return Ok(()); - } - - if let ValueLocation::Cond(loc) = val { - self.push(ValueLocation::Cond(!loc))?; - return Ok(()); - } - - let reg = self - .put_into_register(I64, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap()) - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; setz Rb(out.rq().unwrap()) - ); - - self.free_value(val)?; - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - fn br_on_cond_code(&mut self, label: Label, cond: CondCode) { - match cond { - cc::EQUAL => dynasm!(self.asm - ; je =>label.0 - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; jne =>label.0 - ), - cc::GT_U => dynasm!(self.asm - ; ja =>label.0 - ), - cc::GE_U => dynasm!(self.asm - ; jae =>label.0 - ), - cc::LT_U => dynasm!(self.asm - ; jb =>label.0 - ), - cc::LE_U => dynasm!(self.asm - ; jbe =>label.0 - ), - cc::GT_S => dynasm!(self.asm - ; jg =>label.0 - ), - cc::GE_S => dynasm!(self.asm - ; jge =>label.0 - ), - cc::LT_S => dynasm!(self.asm - ; jl =>label.0 - ), - cc::LE_S => dynasm!(self.asm - ; jle =>label.0 - ), - } - } - - /// Pops i32 predicate and branches to the specified label - /// if the predicate is equal to zero. - pub fn br_if_false( - &mut self, - target: impl Into>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> { - let mut val = self.pop()?; - let label = self.target_to_label(target.into()); - - let cond = match val { - ValueLocation::Cond(cc) => !cc, - _ => { - let predicate = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap()) - ); - - CondCode::ZF0 - } - }; - - self.free_value(val)?; - - pass_args(self)?; - - self.br_on_cond_code(label, cond); - - Ok(()) - } - - /// Pops i32 predicate and branches to the specified label - /// if the predicate is not equal to zero. - pub fn br_if_true( - &mut self, - target: impl Into>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> { - let mut val = self.pop()?; - let label = self.target_to_label(target.into()); - - let cond = match val { - ValueLocation::Cond(cc) => cc, - _ => { - let predicate = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap()) - ); - - CondCode::ZF1 - } - }; - - self.free_value(val)?; - - pass_args(self)?; - - self.br_on_cond_code(label, cond); - - Ok(()) - } - - /// Branch unconditionally to the specified label. - pub fn br(&mut self, label: impl Into>) { - match label.into() { - BrTarget::Return => self.ret(), - BrTarget::Label(label) => dynasm!(self.asm - ; jmp =>label.0 - ), - } - } - - /// If `default` is `None` then the default is just continuing execution - pub fn br_table( - &mut self, - targets: I, - default: Option>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> - where - I: IntoIterator>>, - I::IntoIter: ExactSizeIterator + DoubleEndedIterator, - { - let mut targets = targets.into_iter(); - let count = targets.len(); - - let mut selector = self.pop()?; - - pass_args(self)?; - - if let Some(imm) = selector.imm_i32() { - if let Some(target) = targets.nth(imm as _).or(Some(default)).and_then(|a| a) { - match target { - BrTarget::Label(label) => self.br(label), - BrTarget::Return => { - dynasm!(self.asm - ; ret - ); - } - } - } - } else { - let end_label = self.create_label(); - - if count > 0 { - let temp = match self.put_into_temp_register(GPRType::Rq, &mut selector) { - Err(e) => return Err(e), - Ok(o) => match o { - Some(r) => Ok((r, false)), - None => { - self.push_physical(ValueLocation::Reg(RAX))?; - self.block_state.regs.mark_used(RAX); - Ok((RAX, true)) - } - }, - }; - - let (selector_reg, pop_selector) = match temp { - Err(e) => return Err(e), - Ok(a) => a, - }; - - let (tmp, pop_tmp) = if let Some(reg) = self.take_reg(I64) { - (reg, false) - } else { - let out_reg = if selector_reg == RAX { RCX } else { RAX }; - - self.push_physical(ValueLocation::Reg(out_reg))?; - self.block_state.regs.mark_used(out_reg); - - (out_reg, true) - }; - - self.immediate_to_reg(tmp, (count as u32).into())?; - dynasm!(self.asm - ; cmp Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ; cmova Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ; lea Rq(tmp.rq().unwrap()), [>start_label] - ; lea Rq(selector_reg.rq().unwrap()), [ - Rq(selector_reg.rq().unwrap()) * 5 - ] - ; add Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ); - - if pop_tmp { - dynasm!(self.asm - ; pop Rq(tmp.rq().unwrap()) - ); - } else { - self.block_state.regs.release(tmp)?; - } - - if pop_selector { - dynasm!(self.asm - ; pop Rq(selector_reg.rq().unwrap()) - ); - } - - dynasm!(self.asm - ; jmp Rq(selector_reg.rq().unwrap()) - ; start_label: - ); - - for target in targets { - let label = target - .map(|target| self.target_to_label(target)) - .unwrap_or(end_label); - dynasm!(self.asm - ; jmp =>label.0 - ); - } - } - - if let Some(def) = default { - self.br(def); - } - - self.define_label(end_label); - } - - self.free_value(selector)?; - Ok(()) - } - - fn set_stack_depth(&mut self, depth: StackDepth) -> Result<(), Error> { - if self.block_state.depth.0 != depth.0 { - let diff = depth.0 as i32 - self.block_state.depth.0 as i32; - let emit_lea = if diff.abs() != 1 { - true - } else { - match self.block_state.depth.0.cmp(&depth.0) { - Ordering::Less => { - for _ in 0..diff { - dynasm!(self.asm - ; push rax - ); - } - false - } - Ordering::Greater => { - if let Some(trash) = self.take_reg(I64) { - for _ in 0..self.block_state.depth.0 - depth.0 { - dynasm!(self.asm - ; pop Rq(trash.rq().unwrap()) - ); - } - self.block_state.regs.release(trash)?; - false - } else { - true - } - } - Ordering::Equal => false, - } - }; - if emit_lea { - dynasm!(self.asm - ; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32] - ); - } - self.block_state.depth = depth; - } - Ok(()) - } - - fn do_pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> { - let args = &cc.arguments; - for &dst in args.iter().rev().take(self.block_state.stack.len()) { - if let CCLoc::Reg(r) = dst { - if !self.block_state.regs.is_free(r) - && *self.block_state.stack.last().unwrap() != ValueLocation::Reg(r) - { - // TODO: This would be made simpler and more efficient with a proper SSE - // representation. - self.save_regs(std::iter::once(r))?; - } - - self.block_state.regs.mark_used(r); - } - self.pop_into(dst)?; - } - Ok(()) - } - - pub fn pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> { - self.do_pass_block_args(cc)?; - self.set_stack_depth(cc.stack_depth.clone())?; - Ok(()) - } - - pub fn serialize_block_args( - &mut self, - cc: &BlockCallingConvention, - params: u32, - ) -> Result { - self.do_pass_block_args(cc)?; - - let mut out_args = cc.arguments.clone(); - - out_args.reverse(); - - while out_args.len() < params as usize { - let mut val = self.pop()?; - - // TODO: We can use stack slots for values already on the stack but we - // don't refcount stack slots right now - let ccloc = self.put_into_temp_location(None, &mut val)?; - out_args.push(ccloc); - } - - out_args.reverse(); - - self.set_stack_depth(cc.stack_depth.clone())?; - - Ok(BlockCallingConvention { - stack_depth: cc.stack_depth.clone(), - arguments: out_args, - }) - } - - /// Puts all stack values into "real" locations so that they can i.e. be set to different - /// values on different iterations of a loop - pub fn serialize_args(&mut self, count: u32) -> Result { - let mut out = Vec::with_capacity(count as _); - - // TODO: We can make this more efficient now that `pop` isn't so complicated - for _ in 0..count { - let mut val = self.pop()?; - // TODO: We can use stack slots for values already on the stack but we - // don't refcount stack slots right now - let loc = self.put_into_temp_location(None, &mut val)?; - - out.push(loc); - } - - out.reverse(); - - Ok(BlockCallingConvention { - stack_depth: self.block_state.depth.clone(), - arguments: out, - }) - } - - pub fn get_global(&mut self, global_idx: u32) -> Result<(), Error> { - let (reg, offset) = self - .module_context - .defined_global_index(global_idx) - .map(|defined_global_index| { - ( - None, - self.module_context - .vmctx_vmglobal_definition(defined_global_index), - ) - }) - .unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context.vmctx_vmglobal_import_from(global_idx) as i32 - ] - ); - - (Some(reg), 0) - }); - - let out = self.take_reg(GPRType::Rq).unwrap(); - let vmctx = GPR::Rq(VMCTX); - - // TODO: Are globals necessarily aligned to 128 bits? We can load directly to an XMM reg if so - dynasm!(self.asm - ; mov Rq(out.rq().unwrap()), [Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32] - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - pub fn set_global(&mut self, global_idx: u32) -> Result<(), Error> { - let mut val = self.pop()?; - let (reg, offset) = self - .module_context - .defined_global_index(global_idx) - .map(|defined_global_index| { - ( - None, - self.module_context - .vmctx_vmglobal_definition(defined_global_index), - ) - }) - .unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context.vmctx_vmglobal_import_from(global_idx) as i32 - ] - ); - - (Some(reg), 0) - }); - - let val_reg = self - .put_into_register(GPRType::Rq, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let vmctx = GPR::Rq(VMCTX); - - // We always use `Rq` (even for floats) since the globals are not necessarily aligned to 128 bits - dynasm!(self.asm - ; mov [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32 - ], Rq(val_reg.rq().unwrap()) - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - self.free_value(val)?; - Ok(()) - } - - fn immediate_to_reg(&mut self, reg: GPR, val: Value) -> Result<(), Error> { - match reg { - GPR::Rq(r) => { - let val = val.as_bytes(); - if (val as u64) <= u32::max_value() as u64 { - dynasm!(self.asm - ; mov Rd(r), val as i32 - ); - } else { - dynasm!(self.asm - ; mov Rq(r), QWORD val - ); - } - } - reg @ GPR::Rx(_) => { - let tmp = self - .take_reg(GPRType::Rq) - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - self.immediate_to_reg(tmp, val)?; - let tmp = ValueLocation::Reg(tmp); - self.copy_value(tmp, CCLoc::Reg(reg))?; - self.free_value(tmp)?; - } - } - - Ok(()) - } - - // The `&` and `&mut` aren't necessary (`ValueLocation` is copy) but it ensures that we don't get - // the arguments the wrong way around. In the future we want to have a `ReadLocation` and `WriteLocation` - // so we statically can't write to a literal so this will become a non-issue. - fn copy_value(&mut self, src: ValueLocation, dst: CCLoc) -> Result<(), Error> { - match (src, dst) { - (ValueLocation::Cond(cond), CCLoc::Stack(o)) => { - let offset = self.adjusted_offset(o); - - self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?; - - match cond { - cc::EQUAL => dynasm!(self.asm - ; sete [rsp + offset] - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; setne [rsp + offset] - ), - cc::GT_U => dynasm!(self.asm - ; seta [rsp + offset] - ), - cc::GE_U => dynasm!(self.asm - ; setae [rsp + offset] - ), - cc::LT_U => dynasm!(self.asm - ; setb [rsp + offset] - ), - cc::LE_U => dynasm!(self.asm - ; setbe [rsp + offset] - ), - cc::GT_S => dynasm!(self.asm - ; setg [rsp + offset] - ), - cc::GE_S => dynasm!(self.asm - ; setge [rsp + offset] - ), - cc::LT_S => dynasm!(self.asm - ; setl [rsp + offset] - ), - cc::LE_S => dynasm!(self.asm - ; setle [rsp + offset] - ), - } - } - (ValueLocation::Cond(cond), CCLoc::Reg(reg)) => match reg { - GPR::Rq(r) => { - self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?; - - match cond { - cc::EQUAL => dynasm!(self.asm - ; sete Rb(r) - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; setne Rb(r) - ), - cc::GT_U => dynasm!(self.asm - ; seta Rb(r) - ), - cc::GE_U => dynasm!(self.asm - ; setae Rb(r) - ), - cc::LT_U => dynasm!(self.asm - ; setb Rb(r) - ), - cc::LE_U => dynasm!(self.asm - ; setbe Rb(r) - ), - cc::GT_S => dynasm!(self.asm - ; setg Rb(r) - ), - cc::GE_S => dynasm!(self.asm - ; setge Rb(r) - ), - cc::LT_S => dynasm!(self.asm - ; setl Rb(r) - ), - cc::LE_S => dynasm!(self.asm - ; setle Rb(r) - ), - } - } - GPR::Rx(_) => { - let temp = CCLoc::Reg(self.take_reg(I32).unwrap()); - self.copy_value(src, temp)?; - let temp = temp.into(); - self.copy_value(temp, dst)?; - self.free_value(temp)?; - } - }, - (ValueLocation::Stack(in_offset), CCLoc::Stack(out_offset)) => { - let in_offset = self.adjusted_offset(in_offset); - let out_offset = self.adjusted_offset(out_offset); - if in_offset != out_offset { - if let Some(gpr) = self.take_reg(I64) { - dynasm!(self.asm - ; mov Rq(gpr.rq().unwrap()), [rsp + in_offset] - ; mov [rsp + out_offset], Rq(gpr.rq().unwrap()) - ); - self.block_state.regs.release(gpr)?; - } else { - dynasm!(self.asm - ; push rax - ; mov rax, [rsp + in_offset + WORD_SIZE as i32] - ; mov [rsp + out_offset + WORD_SIZE as i32], rax - ; pop rax - ); - } - } - } - // TODO: XMM registers - (ValueLocation::Reg(in_reg), CCLoc::Stack(out_offset)) => { - let out_offset = self.adjusted_offset(out_offset); - match in_reg { - GPR::Rq(in_reg) => { - // We can always use `Rq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; mov [rsp + out_offset], Rq(in_reg) - ); - } - GPR::Rx(in_reg) => { - // We can always use `movq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; movq [rsp + out_offset], Rx(in_reg) - ); - } - } - } - (ValueLocation::Immediate(i), CCLoc::Stack(out_offset)) => { - // TODO: Floats - let i = i.as_bytes(); - let out_offset = self.adjusted_offset(out_offset); - if let Some(scratch) = self.take_reg(I64) { - dynasm!(self.asm - ; mov Rq(scratch.rq().unwrap()), QWORD i - ; mov [rsp + out_offset], Rq(scratch.rq().unwrap()) - ); - - self.block_state.regs.release(scratch)?; - } else { - dynasm!(self.asm - ; push rax - ; mov rax, QWORD i - ; mov [rsp + out_offset + WORD_SIZE as i32], rax - ; pop rax - ); - } - } - (ValueLocation::Stack(in_offset), CCLoc::Reg(out_reg)) => { - let in_offset = self.adjusted_offset(in_offset); - match out_reg { - GPR::Rq(out_reg) => { - // We can always use `Rq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; mov Rq(out_reg), [rsp + in_offset] - ); - } - GPR::Rx(out_reg) => { - // We can always use `movq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; movq Rx(out_reg), [rsp + in_offset] - ); - } - } - } - (ValueLocation::Reg(in_reg), CCLoc::Reg(out_reg)) => { - if in_reg != out_reg { - match (in_reg, out_reg) { - (GPR::Rq(in_reg), GPR::Rq(out_reg)) => { - dynasm!(self.asm - ; mov Rq(out_reg), Rq(in_reg) - ); - } - (GPR::Rx(in_reg), GPR::Rq(out_reg)) => { - dynasm!(self.asm - ; movq Rq(out_reg), Rx(in_reg) - ); - } - (GPR::Rq(in_reg), GPR::Rx(out_reg)) => { - dynasm!(self.asm - ; movq Rx(out_reg), Rq(in_reg) - ); - } - (GPR::Rx(in_reg), GPR::Rx(out_reg)) => { - dynasm!(self.asm - ; movapd Rx(out_reg), Rx(in_reg) - ); - } - } - } - } - (ValueLocation::Immediate(i), CCLoc::Reg(out_reg)) => { - // TODO: Floats - self.immediate_to_reg(out_reg, i)?; - } - } - Ok(()) - } - - /// Define the given label at the current position. - /// - /// Multiple labels can be defined at the same position. However, a label - /// can be defined only once. - pub fn define_label(&mut self, label: Label) { - self.asm.dynamic_label(label.0); - } - - pub fn set_state(&mut self, state: VirtualCallingConvention) -> Result<(), Error> { - self.block_state.regs = Registers::new(); - self.block_state.regs.release_scratch_register()?; - for elem in &state.stack { - if let ValueLocation::Reg(r) = elem { - self.block_state.regs.mark_used(*r); - } - } - self.block_state.stack = state.stack; - self.block_state.depth = state.depth; - Ok(()) - } - - pub fn apply_cc( - &mut self, - cc: BlockCallingConvention>, - ) -> Result<(), Error> { - let stack = cc.arguments.into_iter(); - - self.block_state.stack = Vec::with_capacity(stack.size_hint().0); - self.block_state.regs = Registers::new(); - self.block_state.regs.release_scratch_register()?; - - for elem in stack { - if let CCLoc::Reg(r) = elem { - self.block_state.regs.mark_used(r); - } - - self.block_state.stack.push(elem.into()); - } - - self.block_state.depth = cc.stack_depth; - Ok(()) - } - - load!(i32_load, GPRType::Rq, Rd, movd, mov, DWORD); - load!(i64_load, GPRType::Rq, Rq, movq, mov, QWORD); - load!(f32_load, GPRType::Rx, Rd, movd, mov, DWORD); - load!(f64_load, GPRType::Rx, Rq, movq, mov, QWORD); - - load!(i32_load8_u, GPRType::Rq, Rd, NONE, movzx, BYTE); - load!(i32_load8_s, GPRType::Rq, Rd, NONE, movsx, BYTE); - load!(i32_load16_u, GPRType::Rq, Rd, NONE, movzx, WORD); - load!(i32_load16_s, GPRType::Rq, Rd, NONE, movsx, WORD); - - load!(i64_load8_u, GPRType::Rq, Rq, NONE, movzx, BYTE); - load!(i64_load8_s, GPRType::Rq, Rq, NONE, movsx, BYTE); - load!(i64_load16_u, GPRType::Rq, Rq, NONE, movzx, WORD); - load!(i64_load16_s, GPRType::Rq, Rq, NONE, movsx, WORD); - load!(i64_load32_u, GPRType::Rq, Rd, movd, mov, DWORD); - load!(i64_load32_s, GPRType::Rq, Rq, NONE, movsxd, DWORD); - - store!(store8, Rb, NONE, DWORD); - store!(store16, Rw, NONE, QWORD); - store!(store32, Rd, movd, DWORD); - store!(store64, Rq, movq, QWORD); - - fn push_physical(&mut self, mut value: ValueLocation) -> Result { - let out_offset = -(self.block_state.depth.0 as i32 + 1); - match value { - ValueLocation::Reg(_) | ValueLocation::Immediate(_) | ValueLocation::Cond(_) => { - if let Some(gpr) = self.put_into_register(GPRType::Rq, &mut value)? { - dynasm!(self.asm - ; push Rq(gpr.rq().unwrap()) - ); - } else { - dynasm!(self.asm - ; push rax - ); - - self.copy_value(value, CCLoc::Stack(out_offset))?; - } - - self.free_value(value)?; - } - ValueLocation::Stack(o) => { - let offset = self.adjusted_offset(o); - dynasm!(self.asm - ; push QWORD [rsp + offset] - ); - } - } - - self.block_state.depth.reserve(1); - - Ok(ValueLocation::Stack(out_offset)) - } - - fn push(&mut self, value: ValueLocation) -> Result<(), Error> { - if let Some(mut top) = self.block_state.stack.pop() { - if let ValueLocation::Cond(_) = top { - match self.put_into_register(I32, &mut top) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - } - - self.block_state.stack.push(top); - } - - self.block_state.stack.push(value); - Ok(()) - } - - fn pop(&mut self) -> Result { - match self.block_state.stack.pop() { - Some(v) => Ok(v), - None => Err(Error::Microwasm( - "Stack is empty - pop impossible".to_string(), - )), - } - } - - pub fn drop(&mut self, range: RangeInclusive) -> Result<(), Error> { - let mut repush = Vec::with_capacity(*range.start() as _); - - for _ in 0..*range.start() { - let v = self.pop()?; - repush.push(v); - } - - for _ in range { - let val = self.pop()?; - self.free_value(val)?; - } - - for v in repush.into_iter().rev() { - self.push(v)?; - } - Ok(()) - } - - fn pop_into(&mut self, dst: CCLoc) -> Result<(), Error> { - let val = self.pop()?; - self.copy_value(val, dst)?; - self.free_value(val)?; - Ok(()) - } - - fn free_value(&mut self, val: ValueLocation) -> Result<(), Error> { - if let ValueLocation::Reg(r) = val { - self.block_state.regs.release(r)?; - } - Ok(()) - } - - /// Puts this value into a register so that it can be efficiently read - fn put_into_register( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result, Error> { - if let Some(out) = self.clone_to_register(ty, *val)? { - self.free_value(*val)?; - *val = ValueLocation::Reg(out); - Ok(Some(out)) - } else { - Ok(None) - } - } - - /// Clones this value into a register so that it can be efficiently read - fn clone_to_register( - &mut self, - ty: impl Into>, - val: ValueLocation, - ) -> Result, Error> { - let ty = ty.into(); - match val { - ValueLocation::Reg(r) if ty.map(|t| t == r.type_()).unwrap_or(true) => { - self.block_state.regs.mark_used(r); - Ok(Some(r)) - } - val => match self.take_reg(ty.unwrap_or(GPRType::Rq)) { - Some(scratch) => { - self.copy_value(val, CCLoc::Reg(scratch))?; - Ok(Some(scratch)) - } - None => Ok(None), - }, - } - } - - /// Puts this value into a temporary register so that operations - /// on that register don't write to a local. - fn put_into_temp_register( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result, Error> { - let out = self.clone_to_temp_register(ty, *val)?; - if let Some(o) = out { - self.free_value(*val)?; - *val = ValueLocation::Reg(o); - Ok(Some(o)) - } else { - Ok(None) - } - } - - fn put_into_temp_location( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result { - if let Some(gpr) = self.put_into_temp_register(ty, val)? { - Ok(CCLoc::Reg(gpr)) - } else { - let out = CCLoc::Stack(self.push_physical(*val)?.stack().unwrap()); - *val = out.into(); - Ok(out) - } - } - - /// Clones this value into a temporary register so that operations - /// on that register don't write to a local. - - fn clone_to_temp_register( - &mut self, - ty: impl Into>, - val: ValueLocation, - ) -> Result, Error> { - // If we have `None` as the type then it always matches (`.unwrap_or(true)`) - match val { - ValueLocation::Reg(r) => { - let ty = ty.into(); - let type_matches = ty.map(|t| t == r.type_()).unwrap_or(true); - - if self.block_state.regs.num_usages(r) <= 1 && type_matches { - self.block_state.regs.mark_used(r); - Ok(Some(r)) - } else if let Some(scratch) = self.take_reg(ty.unwrap_or(GPRType::Rq)) { - self.copy_value(val, CCLoc::Reg(scratch))?; - Ok(Some(scratch)) - } else { - Ok(None) - } - } - val => self.clone_to_register(ty, val), - } - } - - pub fn f32_neg(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits((-f32::from_bits(i.to_bits())).to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32)); - - dynasm!(self.asm - ; xorps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_neg(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits((-f64::from_bits(i.to_bits())).to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - - dynasm!(self.asm - ; xorpd Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_abs(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits(f32::from_bits(i.to_bits()).abs().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32)); - - dynasm!(self.asm - ; andps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_abs(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits(f64::from_bits(i.to_bits()).abs().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let const_label = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64)); - - dynasm!(self.asm - ; andps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_sqrt(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits(f32::from_bits(i.to_bits()).sqrt().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; sqrtss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_sqrt(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits(f64::from_bits(i.to_bits()).sqrt().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; sqrtsd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ); - - ValueLocation::Reg(reg) - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_copysign(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let (Some(left), Some(right)) = (left.imm_f32(), right.imm_f32()) { - ValueLocation::Immediate( - Ieee32::from_bits( - (left.to_bits() & REST_MASK_F32) | (right.to_bits() & SIGN_MASK_F32), - ) - .into(), - ) - } else { - let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let rreg = match self.put_into_register(GPRType::Rx, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let sign_mask = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32)); - let rest_mask = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32)); - - dynasm!(self.asm - ; andps Rx(rreg.rx().unwrap()), [=>sign_mask.0] - ; andps Rx(lreg.rx().unwrap()), [=>rest_mask.0] - ; orps Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ); - - self.free_value(right)?; - - left - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_copysign(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let (Some(left), Some(right)) = (left.imm_f64(), right.imm_f64()) { - ValueLocation::Immediate( - Ieee64::from_bits( - (left.to_bits() & REST_MASK_F64) | (right.to_bits() & SIGN_MASK_F64), - ) - .into(), - ) - } else { - let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let rreg = match self.put_into_register(GPRType::Rx, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let rest_mask = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64)); - - dynasm!(self.asm - ; andpd Rx(rreg.rx().unwrap()), [=>sign_mask.0] - ; andpd Rx(lreg.rx().unwrap()), [=>rest_mask.0] - ; orpd Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ); - - self.free_value(right)?; - - left - }; - - self.push(out)?; - Ok(()) - } - - pub fn i32_clz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_i32().unwrap().leading_zeros().into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rd(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_2 = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; bsr Rd(temp.rq().unwrap()), [rsp + offset] - ; mov Rd(temp_2.rq().unwrap()), DWORD 0x3fu64 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) - ; mov Rd(temp_2.rq().unwrap()), DWORD 0x1fu64 as _ - ; xor Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_2))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsr Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x3fu64 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x1fu64 as _ - ; xor Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i64_clz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap().leading_zeros() as u64).into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rq(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_2 = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsr Rq(temp.rq().unwrap()), [rsp + offset] - ; mov Rq(temp_2.rq().unwrap()), QWORD 0x7fu64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) - ; mov Rq(temp_2.rq().unwrap()), QWORD 0x3fu64 as _ - ; xor Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_2))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsr Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x7fu64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x3fu64 as _ - ; xor Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i32_ctz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_i32().unwrap().trailing_zeros().into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rd(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_zero_val = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; bsf Rd(temp.rq().unwrap()), [rsp + offset] - ; mov Rd(temp_zero_val.rq().unwrap()), DWORD 0x20u32 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(temp_zero_val.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_zero_val))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsf Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x20u32 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i64_ctz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap().trailing_zeros() as u64).into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rq(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_zero_val = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsf Rq(temp.rq().unwrap()), [rsp + offset] - ; mov Rq(temp_zero_val.rq().unwrap()), QWORD 0x40u64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(temp_zero_val.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_zero_val))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsf Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x40u64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i32_extend_u(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = if let ValueLocation::Immediate(imm) = val { - ValueLocation::Immediate((imm.as_i32().unwrap() as u32 as u64).into()) - } else { - let new_reg = self.take_reg(I64).unwrap(); - - // TODO: Track set-ness of bits - we can make this a no-op in most cases - // but we have to make this unconditional just in case this value - // came from a truncate. - match val { - ValueLocation::Reg(GPR::Rx(rxreg)) => { - dynasm!(self.asm - ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) - ); - } - ValueLocation::Reg(GPR::Rq(rqreg)) => { - dynasm!(self.asm - ; mov Rd(new_reg.rq().unwrap()), Rd(rqreg) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; mov Rd(new_reg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Cond(_) => self.copy_value(val, CCLoc::Reg(new_reg))?, - ValueLocation::Immediate(_) => { - return Err(Error::Microwasm( - "i32_extend_u unreachable code".to_string(), - )) - } - } - - ValueLocation::Reg(new_reg) - }; - - self.free_value(val)?; - - self.push(out)?; - Ok(()) - } - - pub fn i32_extend_s(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - self.free_value(val)?; - let new_reg = self.take_reg(I64).unwrap(); - - let out = match val { - ValueLocation::Reg(GPR::Rx(rxreg)) => { - dynasm!(self.asm - ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) - ; movsxd Rq(new_reg.rq().unwrap()), Rd(new_reg.rq().unwrap()) - ); - - ValueLocation::Reg(new_reg) - } - ValueLocation::Reg(GPR::Rq(rqreg)) => { - dynasm!(self.asm - ; movsxd Rq(new_reg.rq().unwrap()), Rd(rqreg) - ); - - ValueLocation::Reg(new_reg) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; movsxd Rq(new_reg.rq().unwrap()), DWORD [rsp + offset] - ); - - ValueLocation::Reg(new_reg) - } - // `CondCode` can only be 0 or 1, so sign-extension is always the same as - // zero-extension - val @ ValueLocation::Cond(_) => { - self.copy_value(val, CCLoc::Reg(new_reg))?; - - ValueLocation::Reg(new_reg) - } - ValueLocation::Immediate(imm) => { - self.block_state.regs.release(new_reg)?; - - ValueLocation::Immediate((imm.as_i32().unwrap() as i64).into()) - } - }; - - self.push(out)?; - Ok(()) - } - - unop!(i32_popcnt, popcnt, Rd, u32, u32::count_ones); - conversion!( - f64_from_f32, - cvtss2sd, - Rx, - rx, - Rx, - rx, - f32, - f64, - as_f32, - |a: Ieee32| Ieee64::from_bits((f32::from_bits(a.to_bits()) as f64).to_bits()) - ); - conversion!( - f32_from_f64, - cvtsd2ss, - Rx, - rx, - Rx, - rx, - f64, - f32, - as_f64, - |a: Ieee64| Ieee32::from_bits((f64::from_bits(a.to_bits()) as f32).to_bits()) - ); - - pub fn i32_truncate_f32_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0xCF00_0000_u32 as i32)); - let zero = self.aligned_label(16, LabelValue::I32(0)); - - dynasm!(self.asm - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0x4F00_0000_u32 as i32)); - - dynasm!(self.asm - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jae >else_ - ; jp >trap - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; jmp >ret - ; else_: - ; subss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f64_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0xC1E0_0000_0020_0000_u64 as i64)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jna >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u32).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0x41E0_0000_0000_0000_u64 as i64)); - - dynasm!(self.asm - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jae >else_ - ; jp >trap - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; jmp >ret - ; else_: - ; subsd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - conversion!( - f32_convert_from_i32_s, - cvtsi2ss, - Rd, - rq, - Rx, - rx, - i32, - f32, - as_i32, - |a| Ieee32::from_bits((a as f32).to_bits()) - ); - conversion!( - f64_convert_from_i32_s, - cvtsi2sd, - Rd, - rq, - Rx, - rx, - i32, - f64, - as_i32, - |a| Ieee64::from_bits((a as f64).to_bits()) - ); - conversion!( - f32_convert_from_i64_s, - cvtsi2ss, - Rq, - rq, - Rx, - rx, - i64, - f32, - as_i64, - |a| Ieee32::from_bits((a as f32).to_bits()) - ); - conversion!( - f64_convert_from_i64_s, - cvtsi2sd, - Rq, - rq, - Rx, - rx, - i64, - f64, - as_i64, - |a| Ieee64::from_bits((a as f64).to_bits()) - ); - - pub fn i64_truncate_f32_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i64).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0xDF00_0000_u32 as i32)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f64_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i64).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(8, LabelValue::I64(SIGN_MASK_F64 as i64)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0xC3E0_0000_0000_0000_u64 as i64)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as u64).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I64).unwrap(); - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let u64_trunc_f32_const = self.aligned_label(16, LabelValue::I32(0x5F00_0000_i32)); - - dynasm!(self.asm - ; comiss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0] - ; jae >large - ; jp >trap - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap()) - ; js >trap - ; jmp >cont - ; large: - ; subss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0] - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >cont - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; cont: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u64).into(), - ), - _ => { - let reg = match self.put_into_register(F64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I64).unwrap(); - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let u64_trunc_f64_const = - self.aligned_label(16, LabelValue::I64(0x43E0_0000_0000_0000_i64)); - - dynasm!(self.asm - ; comisd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0] - ; jnb >large - ; jp >trap - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), 0 - ; jl >trap - ; jmp >cont - ; large: - ; subsd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0] - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), 0 - ; jnge >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >cont - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; cont: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f32_convert_from_i32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee32::from_bits((imm.as_i32().unwrap() as u32 as f32).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(F32).unwrap(); - - dynasm!(self.asm - ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; cvtsi2ss Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f64_convert_from_i32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee64::from_bits((imm.as_i32().unwrap() as u32 as f64).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(F64).unwrap(); - - dynasm!(self.asm - ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; cvtsi2sd Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f32_convert_from_i64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee32::from_bits((imm.as_i64().unwrap() as u64 as f32).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let out = self.take_reg(F32).unwrap(); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; js >negative - ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; jmp >ret - ; negative: - ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; shr Rq(temp.rq().unwrap()), 1 - ; and Rq(reg.rq().unwrap()), 1 - ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap()) - ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; addss Rx(out.rx().unwrap()), Rx(out.rx().unwrap()) - ; ret: - ); - - self.free_value(ValueLocation::Reg(temp))?; - - ValueLocation::Reg(out) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f64_convert_from_i64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee64::from_bits((imm.as_i64().unwrap() as u64 as f64).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let out = self.take_reg(F32).unwrap(); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; js >negative - ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; jmp >ret - ; negative: - ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; shr Rq(temp.rq().unwrap()), 1 - ; and Rq(reg.rq().unwrap()), 1 - ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap()) - ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; addsd Rx(out.rx().unwrap()), Rx(out.rx().unwrap()) - ; ret: - ); - - self.free_value(ValueLocation::Reg(temp))?; - - ValueLocation::Reg(out) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_wrap_from_i64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap() as u64 as u32).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn i32_reinterpret_from_f32(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_f32().unwrap().to_bits().into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn i64_reinterpret_from_f64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_f64().unwrap().to_bits().into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_reinterpret_from_i32(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(Ieee32::from_bits(imm.as_i32().unwrap() as _).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_reinterpret_from_i64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(Ieee64::from_bits(imm.as_i64().unwrap() as _).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64); - - // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands - // are in registers. - commutative_binop_i32!(i32_add, add, i32::wrapping_add); - commutative_binop_i32!(i32_and, and, |a, b| a & b); - commutative_binop_i32!(i32_or, or, |a, b| a | b); - commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b); - binop_i32!(i32_sub, sub, i32::wrapping_sub); - - commutative_binop_i64!(i64_add, add, i64::wrapping_add); - commutative_binop_i64!(i64_and, and, |a, b| a & b); - commutative_binop_i64!(i64_or, or, |a, b| a | b); - commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b); - binop_i64!(i64_sub, sub, i64::wrapping_sub); - - commutative_binop_f32!(f32_add, addss, |a, b| a + b); - commutative_binop_f32!(f32_mul, mulss, |a, b| a * b); - minmax_float!( - f32_min, - minss, - ucomiss, - addss, - orps, - as_f32, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - f32::from_bits(a.to_bits()) - .min(f32::from_bits(b.to_bits())) - .to_bits() - ) - ); - minmax_float!( - f32_max, - maxss, - ucomiss, - addss, - andps, - as_f32, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - f32::from_bits(a.to_bits()) - .max(f32::from_bits(b.to_bits())) - .to_bits() - ) - ); - binop_f32!(f32_sub, subss, |a, b| a - b); - binop_f32!(f32_div, divss, |a, b| a / b); - - pub fn f32_ceil(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::CeilF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_floor(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::FloorF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_nearest(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::NearestF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_trunc(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::TruncF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - commutative_binop_f64!(f64_add, addsd, |a, b| a + b); - commutative_binop_f64!(f64_mul, mulsd, |a, b| a * b); - minmax_float!( - f64_min, - minsd, - ucomisd, - addsd, - orpd, - as_f64, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - f64::from_bits(a.to_bits()) - .min(f64::from_bits(b.to_bits())) - .to_bits() - ) - ); - minmax_float!( - f64_max, - maxsd, - ucomisd, - addsd, - andpd, - as_f64, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - f64::from_bits(a.to_bits()) - .max(f64::from_bits(b.to_bits())) - .to_bits() - ) - ); - binop_f64!(f64_sub, subsd, |a, b| a - b); - binop_f64!(f64_div, divsd, |a, b| a / b); - - pub fn f64_ceil(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::CeilF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_floor(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::FloorF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_nearest(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::NearestF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_trunc(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::TruncF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - shift!( - i32_shl, - Rd, - shl, - |a, b| (a as i32).wrapping_shl(b as _), - I32 - ); - shift!( - i32_shr_s, - Rd, - sar, - |a, b| (a as i32).wrapping_shr(b as _), - I32 - ); - shift!( - i32_shr_u, - Rd, - shr, - |a, b| (a as u32).wrapping_shr(b as _), - I32 - ); - shift!( - i32_rotl, - Rd, - rol, - |a, b| (a as u32).rotate_left(b as _), - I32 - ); - shift!( - i32_rotr, - Rd, - ror, - |a, b| (a as u32).rotate_right(b as _), - I32 - ); - - shift!( - i64_shl, - Rq, - shl, - |a, b| (a as i64).wrapping_shl(b as _), - I64 - ); - shift!( - i64_shr_s, - Rq, - sar, - |a, b| (a as i64).wrapping_shr(b as _), - I64 - ); - shift!( - i64_shr_u, - Rq, - shr, - |a, b| (a as u64).wrapping_shr(b as _), - I64 - ); - shift!( - i64_rotl, - Rq, - rol, - |a, b| (a as u64).rotate_left(b as _), - I64 - ); - shift!( - i64_rotr, - Rq, - ror, - |a, b| (a as u64).rotate_right(b as _), - I64 - ); - - // TODO: Do this without emitting `mov` - fn cleanup_gprs(&mut self, gprs: impl Iterator) { - for gpr in gprs { - dynasm!(self.asm - ; pop Rq(gpr.rq().unwrap()) - ); - self.block_state.depth.free(1); - // DON'T MARK IT USED HERE! See comment in `full_div` - } - } - - int_div!( - i32_full_div_s, - i32_full_div_u, - i32_div_u, - i32_div_s, - i32_rem_u, - i32_rem_s, - imm_i32, - i32, - u32, - Rd, - DWORD - ); - int_div!( - i64_full_div_s, - i64_full_div_u, - i64_div_u, - i64_div_s, - i64_rem_u, - i64_rem_s, - imm_i64, - i64, - u64, - Rq, - QWORD - ); - - // TODO: With a proper SSE-like "Value" system we could do this way better (we wouldn't have - // to move `RAX`/`RDX` back afterwards). - fn full_div( - &mut self, - mut divisor: ValueLocation, - dividend: ValueLocation, - do_div: impl FnOnce(&mut Self, &mut ValueLocation) -> Result<(), Error>, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - // To stop `take_reg` from allocating either of these necessary registers - self.block_state.regs.mark_used(RAX); - self.block_state.regs.mark_used(RDX); - if divisor == ValueLocation::Reg(RAX) || divisor == ValueLocation::Reg(RDX) { - let new_reg = self.take_reg(GPRType::Rq).unwrap(); - self.copy_value(divisor, CCLoc::Reg(new_reg))?; - self.free_value(divisor)?; - - divisor = ValueLocation::Reg(new_reg); - } - self.block_state.regs.release(RAX)?; - self.block_state.regs.release(RDX)?; - - let saved_rax = if self.block_state.regs.is_free(RAX) { - None - } else { - dynasm!(self.asm - ; push rax - ); - self.block_state.depth.reserve(1); - // DON'T FREE THIS REGISTER HERE - since we don't - // remove it from the stack freeing the register - // here will cause `take_reg` to allocate it. - Some(()) - }; - - let saved_rdx = if self.block_state.regs.is_free(RDX) { - None - } else { - dynasm!(self.asm - ; push rdx - ); - self.block_state.depth.reserve(1); - // DON'T FREE THIS REGISTER HERE - since we don't - // remove it from the stack freeing the register - // here will cause `take_reg` to allocate it. - Some(()) - }; - - let saved = saved_rdx - .map(|_| RDX) - .into_iter() - .chain(saved_rax.map(|_| RAX)); - - self.copy_value(dividend, CCLoc::Reg(RAX))?; - self.block_state.regs.mark_used(RAX); - - self.free_value(dividend)?; - // To stop `take_reg` from allocating either of these necessary registers - self.block_state.regs.mark_used(RDX); - - do_div(self, &mut divisor)?; - self.free_value(divisor)?; - - if self.block_state.regs.is_free(RAX) { - return Err(Error::Microwasm("full_div: RAX is not free".to_string())); - } - if self.block_state.regs.is_free(RDX) { - return Err(Error::Microwasm("full_div: RDX is not free".to_string())); - } - - Ok((ValueLocation::Reg(RAX), ValueLocation::Reg(RDX), saved)) - } - - fn i32_full_div_u( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; xor edx, edx - ; div DWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I32, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; xor edx, edx - ; div Rd(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i32_full_div_s( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; cdq - ; idiv DWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I32, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; cdq - ; idiv Rd(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i64_full_div_u( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; xor rdx, rdx - ; div QWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I64, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - dynasm!(this.asm - ; xor rdx, rdx - ; div Rq(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i64_full_div_s( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; cqo - ; idiv QWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I64, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; cqo - ; idiv Rq(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - // `i32_mul` needs to be separate because the immediate form of the instruction - // has a different syntax to the immediate form of the other instructions. - pub fn i32_mul(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - i32::wrapping_mul(right.as_i32().unwrap(), left.as_i32().unwrap()).into(), - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(_) => (left, right), - _ => { - if right.immediate().is_some() { - (left, right) - } else { - (right, left) - } - } - }; - - let out = match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = match self.put_into_register(I32, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let lreg = match self.put_into_temp_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap()) - ); - left - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - let lreg = match self.put_into_temp_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rd(lreg.rq().unwrap()), [rsp + offset] - ); - left - } - ValueLocation::Immediate(i) => { - let lreg = match self.put_into_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let new_reg = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; imul Rd(new_reg.rq().unwrap()), Rd(lreg.rq().unwrap()), i.as_i32().unwrap() - ); - self.free_value(left)?; - ValueLocation::Reg(new_reg) - } - }; - - self.push(out)?; - self.free_value(right)?; - Ok(()) - } - - // `i64_mul` needs to be separate because the immediate form of the instruction - // has a different syntax to the immediate form of the other instructions. - pub fn i64_mul(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - i64::wrapping_mul(right.as_i64().unwrap(), left.as_i64().unwrap()).into(), - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(_) => (left, right), - _ => { - if right.immediate().is_some() { - (left, right) - } else { - (right, left) - } - } - }; - - let out = match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = match self.put_into_register(I64, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let lreg = match self.put_into_temp_register(I64, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - left - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - let lreg = match self.put_into_temp_register(I64, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), [rsp + offset] - ); - left - } - ValueLocation::Immediate(i) => { - let i = i.as_i64().unwrap(); - if let Ok(i) = i.try_into() { - let new_reg = self.take_reg(I64).unwrap(); - - let lreg = self - .put_into_register(I64, &mut left)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; imul Rq(new_reg.rq().unwrap()), Rq(lreg.rq().unwrap()), i - ); - - self.free_value(left)?; - - ValueLocation::Reg(new_reg) - } else { - let rreg = self - .put_into_register(I64, &mut right)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let lreg = self - .put_into_temp_register(I64, &mut left)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - left - } - } - }; - - self.push(out)?; - self.free_value(right)?; - Ok(()) - } - - fn cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc) { - match src { - CCLoc::Reg(reg) => match cond_code { - cc::EQUAL => { - dynasm!(self.asm - ; cmove Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::NOT_EQUAL => { - dynasm!(self.asm - ; cmovne Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GE_U => { - dynasm!(self.asm - ; cmovae Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LT_U => { - dynasm!(self.asm - ; cmovb Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GT_U => { - dynasm!(self.asm - ; cmova Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LE_U => { - dynasm!(self.asm - ; cmovbe Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GE_S => { - dynasm!(self.asm - ; cmovge Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LT_S => { - dynasm!(self.asm - ; cmovl Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GT_S => { - dynasm!(self.asm - ; cmovg Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LE_S => { - dynasm!(self.asm - ; cmovle Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - }, - CCLoc::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - match cond_code { - cc::EQUAL => { - dynasm!(self.asm - ; cmove Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::NOT_EQUAL => { - dynasm!(self.asm - ; cmovne Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GE_U => { - dynasm!(self.asm - ; cmovae Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LT_U => { - dynasm!(self.asm - ; cmovb Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GT_U => { - dynasm!(self.asm - ; cmova Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LE_U => { - dynasm!(self.asm - ; cmovbe Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GE_S => { - dynasm!(self.asm - ; cmovge Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LT_S => { - dynasm!(self.asm - ; cmovl Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GT_S => { - dynasm!(self.asm - ; cmovg Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LE_S => { - dynasm!(self.asm - ; cmovle Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - } - } - } - } - - pub fn select(&mut self) -> Result<(), Error> { - let mut cond = self.pop()?; - let mut else_ = self.pop()?; - let mut then = self.pop()?; - - if let ValueLocation::Immediate(i) = cond { - if i.as_i32().unwrap() == 0 { - self.free_value(then)?; - self.push(else_)?; - } else { - self.free_value(else_)?; - self.push(then)?; - } - - return Ok(()); - } - - let cond_code = match cond { - ValueLocation::Cond(cc) => cc, - _ => { - let cond_reg = match self.put_into_register(I32, &mut cond) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - dynasm!(self.asm - ; test Rd(cond_reg.rq().unwrap()), Rd(cond_reg.rq().unwrap()) - ); - self.free_value(cond)?; - - cc::NOT_EQUAL - } - }; - - let else_ = if let ValueLocation::Stack(offset) = else_ { - CCLoc::Stack(offset) - } else { - let gpr = match self.put_into_register(I32, &mut else_) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - CCLoc::Reg(gpr) - }; - - let then = if let ValueLocation::Stack(offset) = then { - CCLoc::Stack(offset) - } else { - let gpr = match self.put_into_register(I32, &mut then) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - CCLoc::Reg(gpr) - }; - - let out_gpr = match (then, else_) { - (CCLoc::Reg(then_reg), else_) if self.block_state.regs.num_usages(then_reg) <= 1 => { - self.cmov(!cond_code, then_reg, else_); - self.free_value(else_.into())?; - - then_reg - } - (then, CCLoc::Reg(else_reg)) if self.block_state.regs.num_usages(else_reg) <= 1 => { - self.cmov(cond_code, else_reg, then); - self.free_value(then.into())?; - - else_reg - } - (then, else_) => { - let out = self.take_reg(GPRType::Rq).unwrap(); - self.copy_value(else_.into(), CCLoc::Reg(out))?; - self.cmov(cond_code, out, then); - - self.free_value(then.into())?; - self.free_value(else_.into())?; - - out - } - }; - - self.push(ValueLocation::Reg(out_gpr))?; - Ok(()) - } - - pub fn pick(&mut self, depth: u32) { - let idx = self.block_state.stack.len() - 1 - depth as usize; - let v = self.block_state.stack[idx]; - if let ValueLocation::Reg(r) = v { - self.block_state.regs.mark_used(r); - } - self.block_state.stack.push(v); - } - - pub fn const_(&mut self, imm: Value) -> Result<(), Error> { - self.push(ValueLocation::Immediate(imm))?; - Ok(()) - } - - fn relocated_function_call< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - name: &cranelift_codegen::ir::ExternalName, - args: A, - rets: R, - func_def_loc: FunctionDefLocation, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(args); - - let saved_vmctx = if func_def_loc == FunctionDefLocation::PossiblyExternal { - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - self.block_state.regs.mark_used(GPR::Rq(VMCTX)); - Some(self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?) - } else { - None - }; - - self.save_volatile()?; - - self.pass_outgoing_args(&locs)?; - - // 2 bytes for the 64-bit `mov` opcode + register ident, the rest is the immediate - self.reloc_sink.reloc_external( - (self.asm.offset().0 - - self.func_starts[self.current_function as usize] - .0 - .unwrap() - .0) as u32 - + 2, - // Passing a default location here, since until proven otherwise, it's not used. - ir::SourceLoc::default(), - binemit::Reloc::Abs8, - name, - 0, - ); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(temp.rq().unwrap()), QWORD 0xDEAD_BEEF_DEAD_BEEF_u64 as i64 - ; call Rq(temp.rq().unwrap()) - ); - self.block_state.regs.release(temp)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(rets)?; - - if func_def_loc == FunctionDefLocation::PossiblyExternal { - let saved_vmctx = saved_vmctx.unwrap(); - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?; - self.free_value(saved_vmctx)?; - } - - Ok(()) - } - - fn builtin_function_call< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - i: BuiltinFunctionIndex, - args: A, - rets: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs(args); - - dynasm!(self.asm - ; push Rq(VMCTX) - ); - self.block_state.depth.reserve(1); - let depth = self.block_state.depth.clone(); - - self.save_volatile()?; - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.pass_outgoing_args(&locs)?; - - let temp = self.take_reg(I64).unwrap(); - dynasm!(self.asm - ; mov Rq(temp.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_builtin_function(i.index()) as i32 - ] - ; call Rq(temp.rq().unwrap()) - ); - - self.block_state.regs.release(temp)?; - - for i in locs { - self.free_value(i.into())?; - } - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - - self.push_function_returns(rets)?; - - self.set_stack_depth(depth)?; - dynasm!(self.asm - ; pop Rq(VMCTX) - ); - self.block_state.depth.free(1); - - Ok(()) - } - - // TODO: Other memory indices - pub fn memory_size(&mut self) -> Result<(), Error> { - let memory_index = 0; - if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) { - self.push(ValueLocation::Immediate(defined_memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_memory32_size_index(), - [self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } else { - self.push(ValueLocation::Immediate(memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_imported_memory32_size_index(), - [self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } - Ok(()) - } - - // TODO: Other memory indices - pub fn memory_grow(&mut self) -> Result<(), Error> { - let memory_index = 0; - if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) { - self.push(ValueLocation::Immediate(defined_memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_memory32_grow_index(), - [self.pointer_type, self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } else { - self.push(ValueLocation::Immediate(memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_imported_memory32_grow_index(), - [self.pointer_type, self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } - Ok(()) - } - - // TODO: Use `ArrayVec`? - // TODO: This inefficiently duplicates registers but it's not really possible - // to double up stack space right now. - /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. - fn save_volatile(&mut self) -> Result<(), Error> { - self.save_regs(SCRATCH_REGS.iter().copied())?; - Ok(()) - } - - fn save_regs(&mut self, to_save: I) -> Result<(), Error> - where - I: IntoIterator, - I::IntoIter: Clone, - { - // TODO: We can filter out registers that are already marked free, but just to ensure - // that this doesn't fail when confronted with the `memory_grow`/`memory_size` - // weirdness. - let to_save = to_save.into_iter(); - if to_save.clone().count() == 0 { - return Ok(()); - } - - let mut stack = mem::replace(&mut self.block_state.stack, vec![]); - let mut slice = &mut stack[..]; - - while let Some((first, rest)) = slice.split_first_mut() { - if let ValueLocation::Reg(vreg) = *first { - if to_save.clone().any(|r| r == vreg) { - let old = *first; - *first = self.push_physical(old)?; - for val in &mut *rest { - if *val == old { - self.free_value(*val)?; - *val = *first; - } - } - } - } - - slice = rest; - } - - self.block_state.stack = stack; - - Ok(()) - } - - /// Write the arguments to the callee to the registers and the stack using the SystemV - /// calling convention. - fn pass_outgoing_args( - &mut self, - out_locs: &(impl ExactSizeIterator + DoubleEndedIterator + Clone), - ) -> Result<(), Error> { - let total_stack_space = out_locs - .clone() - .flat_map(|l| { - if let CCLoc::Stack(offset) = l { - if offset >= 0 { - Some(offset as u32 + 1) - } else { - None - } - } else { - None - } - }) - .max() - .unwrap_or(0); - let original_depth = self.block_state.depth.clone(); - let mut needed_depth = original_depth.clone(); - needed_depth.reserve(total_stack_space); - - if needed_depth.0 & 1 != 0 { - needed_depth.reserve(1); - } - - self.set_stack_depth(needed_depth.clone())?; - - let mut pending = Vec::<(ValueLocation, CCLoc)>::with_capacity(out_locs.len()); - - for loc in out_locs.clone().rev() { - let val = self.pop()?; - - pending.push((val, loc)); - } - - while !pending.is_empty() { - let start_len = pending.len(); - - for (src, dst) in mem::replace(&mut pending, vec![]) { - if src != ValueLocation::from(dst) { - let dst = match dst { - CCLoc::Reg(r) => { - if !self.block_state.regs.is_free(r) { - pending.push((src, dst)); - continue; - } - - self.block_state.regs.mark_used(r); - - dst - } - CCLoc::Stack(offset) => CCLoc::Stack(offset - needed_depth.0 as i32), - }; - - self.copy_value(src, dst)?; - self.free_value(src)?; - } - } - - if pending.len() == start_len { - let src = match pending - .iter() - .filter_map(|(src, _)| { - if let ValueLocation::Reg(reg) = src { - Some(reg) - } else { - None - } - }) - .next() - { - None => { - return Err(Error::Microwasm( - "Programmer error: We shouldn't need to push \ - intermediate args if we don't have any argument sources in registers" - .to_string(), - )); - } - Some(val) => *val, - }; - let new_src = self.push_physical(ValueLocation::Reg(src))?; - for (old_src, _) in pending.iter_mut() { - if *old_src == ValueLocation::Reg(src) { - *old_src = new_src; - } - } - } - } - - // We do this a second time just in case we had to use `push_physical` to resolve cycles in - // `pending` - self.set_stack_depth(needed_depth)?; - - Ok(()) - } - - fn push_function_returns( - &mut self, - returns: impl IntoIterator, - ) -> Result<(), Error> { - for loc in ret_locs(returns)? { - if let CCLoc::Reg(reg) = loc { - self.block_state.regs.mark_used(reg); - } - - self.push(loc.into())?; - } - Ok(()) - } - - fn trap_if(&mut self, ccode: CondCode, trap_code: TrapCode) { - let label = self.create_label(); - self.br_on_cond_code(label, !ccode); - self.trap(trap_code); - self.define_label(label); - } - - pub fn call_indirect< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - type_id: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - dynasm!(self.asm - ; push Rq(VMCTX) - ); - self.block_state.depth.reserve(1); - let depth = self.block_state.depth.clone(); - - let locs = arg_locs_skip_caller_vmctx(arg_types); - - for loc in locs.clone() { - if let CCLoc::Reg(r) = loc { - self.block_state.regs.mark_used(r); - } - } - - let mut callee = self.pop()?; - let callee_reg = self - .put_into_temp_register(I32, &mut callee)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - self.save_volatile()?; - - for loc in locs.clone() { - if let CCLoc::Reg(r) = loc { - self.block_state.regs.release(r)?; - } - } - - self.pass_outgoing_args(&locs)?; - - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - - let table_index = 0; - let reg_offset = self - .module_context - .defined_table_index(table_index) - .map(|index| { - ( - None, - self.module_context.vmctx_vmtable_definition(index) as i32, - ) - }); - - let vmctx = GPR::Rq(VMCTX); - let (reg, offset) = reg_offset.unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_vmtable_import_from(table_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let temp0 = self.take_reg(I64).unwrap(); - dynasm!(self.asm - ; cmp Rd(callee_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - offset + - self.module_context.vmtable_definition_current_elements() as i32 - ] - ;; self.trap_if(cc::GE_U, TrapCode::TableOutOfBounds) - ; imul - Rd(callee_reg.rq().unwrap()), - Rd(callee_reg.rq().unwrap()), - self.module_context.size_of_vmcaller_checked_anyfunc() as i32 - ; mov Rq(temp0.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - offset + - self.module_context.vmtable_definition_base() as i32 - ] - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - let temp1 = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rd(temp1.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context - .vmctx_vmshared_signature_id(type_id) as i32 - ] - ; cmp DWORD [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_type_index() as i32 - ], Rd(temp1.rq().unwrap()) - ;; self.trap_if(cc::NOT_EQUAL, TrapCode::BadSignature) - ; mov Rq(VMCTX), [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_vmctx() as i32 - ] - ; call QWORD [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_func_ptr() as i32 - ] - ); - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.block_state.regs.release(temp0)?; - self.block_state.regs.release(temp1)?; - self.free_value(callee)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - - self.set_stack_depth(depth)?; - dynasm!(self.asm - ; pop Rq(VMCTX) - ); - self.block_state.depth.free(1); - - Ok(()) - } - - pub fn swap(&mut self, depth: u32) { - let last = self.block_state.stack.len() - 1; - self.block_state.stack.swap(last, last - depth as usize); - } - - /// Call a function with the given index - pub fn call_direct, R: IntoIterator>( - &mut self, - index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - self.relocated_function_call( - &ir::ExternalName::user(0, index), - arg_types, - return_types, - FunctionDefLocation::SameModule, - )?; - Ok(()) - } - - /// Recursively call the same function again - pub fn call_direct_self< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - defined_index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(arg_types); - - self.save_volatile()?; - - let (_, label) = self.func_starts[defined_index as usize]; - - self.pass_outgoing_args(&locs)?; - dynasm!(self.asm - ; call =>label - ); - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - Ok(()) - } - - /// Call a function with the given index - pub fn call_direct_imported< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(arg_types); - - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - self.block_state.regs.mark_used(GPR::Rq(VMCTX)); - let saved_vmctx = self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?; - - self.save_volatile()?; - self.pass_outgoing_args(&locs)?; - - let callee = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(callee.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_body(index) as i32 - ] - ; mov Rq(VMCTX), [ - Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_vmctx(index) as i32 - ] - ; call Rq(callee.rq().unwrap()) - ); - - self.block_state.regs.release(callee)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?; - self.free_value(saved_vmctx)?; - - Ok(()) - } - - // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them - // as scratch registers - /// Writes the function prologue and stores the arguments as locals - pub fn start_function>( - &mut self, - params: P, - ) -> Result<(), Error> - where - P::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - self.apply_cc(BlockCallingConvention::function_start( - arg_locs_skip_caller_vmctx(params), - ))?; - Ok(()) - } - - pub fn ret(&mut self) { - dynasm!(self.asm - ; ret - ); - } - - pub fn epilogue(&mut self) { - for LabelInfo { - label, - align, - inner, - } in self.labels.drain() - { - match inner { - LabelValue::I32(val) => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; .dword val - ); - } - LabelValue::I64(val) => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; .qword val - ); - } - LabelValue::Ret => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; ret - ); - } - } - } - } - - pub fn trap(&mut self, _trap_id: TrapCode) { - // TODO: Emit trap info by writing the trap ID and current source location to a - // `binemit::TrapSink`. - dynasm!(self.asm - ; ud2 - ); - } - - pub fn ret_label(&mut self) -> Label { - #[derive(Copy, Clone, Hash)] - struct RetLabel; - - self.label(LabelValue::Ret) - } - - fn label(&mut self, label: LabelValue) -> Label { - self.aligned_label(1, label) - } - - fn aligned_label(&mut self, align: u32, label: LabelValue) -> Label { - let asm = &mut self.asm; - self.labels - .insert(|| Label(asm.new_dynamic_label()), align, label) - } - - fn target_to_label(&mut self, target: BrTarget