diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index a2a8e1a862..725e9fd9af 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1309,15 +1309,13 @@ ;; ;; Asserts that the value goes into a XMM. (decl put_in_xmm_mem (Value) XmmMem) -(rule (put_in_xmm_mem val) - (reg_mem_to_xmm_mem (put_in_reg_mem val))) +(extern constructor put_in_xmm_mem put_in_xmm_mem) ;; Put a value into a `XmmMemImm`. ;; ;; Asserts that the value goes into a XMM. (decl put_in_xmm_mem_imm (Value) XmmMemImm) -(rule (put_in_xmm_mem_imm val) - (xmm_mem_imm_new (put_in_reg_mem_imm val))) +(extern constructor put_in_xmm_mem_imm put_in_xmm_mem_imm) ;; Construct an `InstOutput` out of a single GPR register. (decl output_gpr (Gpr) InstOutput) diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 15daf707a9..1136ab5206 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -133,6 +133,48 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { RegMemImm::reg(self.put_in_reg(val)) } + fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm { + let inputs = self.lower_ctx.get_value_as_source_or_const(val); + + if let Some(c) = inputs.constant { + if let Some(imm) = to_simm32(c as i64) { + return XmmMemImm::new(imm.to_reg_mem_imm()).unwrap(); + } + } + + let res = match self.put_in_xmm_mem(val).to_reg_mem() { + RegMem::Reg { reg } => RegMemImm::Reg { reg }, + RegMem::Mem { addr } => RegMemImm::Mem { addr }, + }; + + XmmMemImm::new(res).unwrap() + } + + fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem { + let inputs = self.lower_ctx.get_value_as_source_or_const(val); + + if let Some(c) = inputs.constant { + // A load from the constant pool is better than a rematerialization into a register, + // because it reduces register pressure. + // + // NOTE: this is where behavior differs from `put_in_reg_mem`, as we always force + // constants to be 16 bytes when a constant will be used in place of an xmm register. + let vcode_constant = self.emit_u128_le_const(c as u128); + return XmmMem::new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant))) + .unwrap(); + } + + if let InputSourceInst::UniqueUse(src_insn, 0) = inputs.inst { + if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, src_insn) { + self.lower_ctx.sink_inst(src_insn); + let amode = lower_to_amode(self.lower_ctx, addr_input, offset); + return XmmMem::new(RegMem::mem(amode)).unwrap(); + } + } + + XmmMem::new(RegMem::reg(self.put_in_reg(val))).unwrap() + } + fn put_in_reg_mem(&mut self, val: Value) -> RegMem { let inputs = self.lower_ctx.get_value_as_source_or_const(val); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index a6a3a39657..3417319209 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -804,6 +804,12 @@ macro_rules! isle_prelude_methods { self.lower_ctx.use_constant(data) } + #[inline] + fn emit_u128_le_const(&mut self, value: u128) -> VCodeConstant { + let data = VCodeConstantData::Generated(value.to_le_bytes().as_slice().into()); + self.lower_ctx.use_constant(data) + } + #[inline] fn const_to_vconst(&mut self, constant: Constant) -> VCodeConstant { self.lower_ctx.use_constant(VCodeConstantData::Pool( diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 5eb7a6c5dd..799a58f3c1 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -570,6 +570,12 @@ (decl emit_u64_le_const (u64) VCodeConstant) (extern constructor emit_u64_le_const emit_u64_le_const) +;; Add a u128 little-endian constant to the in-memory constant pool and +;; return a VCodeConstant index that refers to it. This is +;; side-effecting but idempotent (constants are deduplicated). +(decl emit_u128_le_const (u128) VCodeConstant) +(extern constructor emit_u128_le_const emit_u128_le_const) + ;; Fetch the VCodeConstant associated with a Constant. (decl const_to_vconst (Constant) VCodeConstant) (extern constructor const_to_vconst const_to_vconst) diff --git a/cranelift/filetests/filetests/runtests/x64-xmm-mem-align-bug.clif b/cranelift/filetests/filetests/runtests/x64-xmm-mem-align-bug.clif new file mode 100644 index 0000000000..66adea993d --- /dev/null +++ b/cranelift/filetests/filetests/runtests/x64-xmm-mem-align-bug.clif @@ -0,0 +1,17 @@ +test run +set enable_llvm_abi_extensions +target x86_64 + +; Regression test for unaligned loads to xmm registers when relying on automatic +; conversion to XmmMem arguments in ISLE. +; https://github.com/bytecodealliance/wasmtime/issues/4761 +function %a() -> f64 { + ss0 = explicit_slot 59 + +block0: + v0 = f64const 0x1.d7d7d7d7d006fp984 + v1 = fcopysign v0, v0 + return v1 +} + +; run: %a() == 0x1.d7d7d7d7d006fp984