From cee4b209f346ea279490268fe434dc52d0e0680c Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 22 Aug 2022 13:57:36 -0700 Subject: [PATCH] x64: Lower fcopysign, ceil, floor, nearest, and trunc in ISLE (#4730) https://github.com/bytecodealliance/wasmtime/pull/4730 --- cranelift/codegen/src/isa/x64/inst.isle | 61 +++++++++- cranelift/codegen/src/isa/x64/inst/args.rs | 3 +- cranelift/codegen/src/isa/x64/lower.isle | 94 ++++++++++++++ cranelift/codegen/src/isa/x64/lower.rs | 115 +----------------- cranelift/codegen/src/isa/x64/lower/isle.rs | 32 +++++ .../filetests/isa/x64/ceil-libcall.clif | 33 +++++ .../filetests/filetests/isa/x64/ceil.clif | 59 +++++++++ .../filetests/isa/x64/fcopysign.clif | 43 +++++++ .../filetests/isa/x64/floor-libcall.clif | 33 +++++ .../filetests/filetests/isa/x64/floor.clif | 59 +++++++++ .../filetests/isa/x64/nearest-libcall.clif | 33 +++++ .../filetests/filetests/isa/x64/nearest.clif | 59 +++++++++ .../filetests/isa/x64/trunc-libcall.clif | 33 +++++ .../filetests/filetests/isa/x64/trunc.clif | 59 +++++++++ 14 files changed, 605 insertions(+), 111 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/ceil-libcall.clif create mode 100644 cranelift/filetests/filetests/isa/x64/ceil.clif create mode 100644 cranelift/filetests/filetests/isa/x64/fcopysign.clif create mode 100644 cranelift/filetests/filetests/isa/x64/floor-libcall.clif create mode 100644 cranelift/filetests/filetests/isa/x64/floor.clif create mode 100644 cranelift/filetests/filetests/isa/x64/nearest-libcall.clif create mode 100644 cranelift/filetests/filetests/isa/x64/nearest.clif create mode 100644 cranelift/filetests/filetests/isa/x64/trunc-libcall.clif create mode 100644 cranelift/filetests/filetests/isa/x64/trunc.clif diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index be1b4c6ffd..cabbffc343 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1120,6 +1120,15 @@ (decl encode_fcmp_imm (FcmpImm) u8) (extern constructor encode_fcmp_imm encode_fcmp_imm) +(type RoundImm extern + (enum RoundNearest + RoundDown + RoundUp + RoundZero)) + +(decl encode_round_imm (RoundImm) u8) +(extern constructor encode_round_imm encode_round_imm) + ;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type Gpr (primitive Gpr)) @@ -1394,6 +1403,9 @@ (decl use_fma () Type) (extern extractor use_fma use_fma) +(decl use_sse41 () Type) +(extern extractor use_sse41 use_sse41) + ;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;; ;; Extract a constant `Imm8Reg.Imm8` from a value operand. @@ -2575,6 +2587,42 @@ lane size)) +;; Helper for creating `roundss` instructions. +(decl x64_roundss (Xmm RoundImm) Xmm) +(rule (x64_roundss src1 round) + (xmm_rm_r_imm (SseOpcode.Roundss) + src1 + src1 + (encode_round_imm round) + (OperandSize.Size32))) + +;; Helper for creating `roundsd` instructions. +(decl x64_roundsd (Xmm RoundImm) Xmm) +(rule (x64_roundsd src1 round) + (xmm_rm_r_imm (SseOpcode.Roundsd) + src1 + src1 + (encode_round_imm round) + (OperandSize.Size32))) + +;; Helper for creating `roundps` instructions. +(decl x64_roundps (Xmm RoundImm) Xmm) +(rule (x64_roundps src1 round) + (xmm_rm_r_imm (SseOpcode.Roundps) + src1 + src1 + (encode_round_imm round) + (OperandSize.Size32))) + +;; Helper for creating `roundpd` instructions. +(decl x64_roundpd (Xmm RoundImm) Xmm) +(rule (x64_roundpd src1 round) + (xmm_rm_r_imm (SseOpcode.Roundpd) + src1 + src1 + (encode_round_imm round) + (OperandSize.Size32))) + ;; Helper for creating `pmaddwd` instructions. (decl x64_pmaddwd (Xmm XmmMem) Xmm) (rule (x64_pmaddwd src1 src2) @@ -3659,7 +3707,18 @@ (type LibCall extern (enum FmaF32 - FmaF64)) + FmaF64 + CeilF32 + CeilF64 + FloorF32 + FloorF64 + NearestF32 + NearestF64 + TruncF32 + TruncF64)) + +(decl libcall_1 (LibCall Reg) Reg) +(extern constructor libcall_1 libcall_1) (decl libcall_3 (LibCall Reg Reg Reg) Reg) (extern constructor libcall_3 libcall_3) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 8baf4061b4..eddfc6cd94 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1770,7 +1770,8 @@ impl From for FcmpImm { /// However the rounding immediate which this field helps make up, also includes /// bits 3 and 4 which define the rounding select and precision mask respectively. /// These two bits are not defined here and are implictly set to zero when encoded. -pub(crate) enum RoundImm { +#[derive(Clone, Copy)] +pub enum RoundImm { RoundNearest = 0x00, RoundDown = 0x01, RoundUp = 0x02, diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 4f6e4ba6d8..10cdd7fd86 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3332,3 +3332,97 @@ (rule (lower (has_type $F64 (bitcast src @ (value_type $I64)))) (bitcast_gpr_to_xmm $I64 src)) + +;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b))) + (let ((sign_bit Xmm (imm $F32 0x80000000))) + (x64_orps + (x64_andnps sign_bit a) + (x64_andps sign_bit b)))) + +(rule (lower (has_type $F64 (fcopysign a @ (value_type $F64) b))) + (let ((sign_bit Xmm (imm $F64 0x8000000000000000))) + (x64_orpd + (x64_andnpd sign_bit a) + (x64_andpd sign_bit b)))) + +;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32)))) + (x64_roundss a (RoundImm.RoundUp))) + +(rule (lower (ceil a @ (value_type $F32))) + (libcall_1 (LibCall.CeilF32) a)) + +(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64)))) + (x64_roundsd a (RoundImm.RoundUp))) + +(rule (lower (ceil a @ (value_type $F64))) + (libcall_1 (LibCall.CeilF64) a)) + +(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32X4)))) + (x64_roundps a (RoundImm.RoundUp))) + +(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64X2)))) + (x64_roundpd a (RoundImm.RoundUp))) + +;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32)))) + (x64_roundss a (RoundImm.RoundDown))) + +(rule (lower (floor a @ (value_type $F32))) + (libcall_1 (LibCall.FloorF32) a)) + +(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64)))) + (x64_roundsd a (RoundImm.RoundDown))) + +(rule (lower (floor a @ (value_type $F64))) + (libcall_1 (LibCall.FloorF64) a)) + +(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32X4)))) + (x64_roundps a (RoundImm.RoundDown))) + +(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64X2)))) + (x64_roundpd a (RoundImm.RoundDown))) + +;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32)))) + (x64_roundss a (RoundImm.RoundNearest))) + +(rule (lower (nearest a @ (value_type $F32))) + (libcall_1 (LibCall.NearestF32) a)) + +(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64)))) + (x64_roundsd a (RoundImm.RoundNearest))) + +(rule (lower (nearest a @ (value_type $F64))) + (libcall_1 (LibCall.NearestF64) a)) + +(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32X4)))) + (x64_roundps a (RoundImm.RoundNearest))) + +(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64X2)))) + (x64_roundpd a (RoundImm.RoundNearest))) + +;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32)))) + (x64_roundss a (RoundImm.RoundZero))) + +(rule (lower (trunc a @ (value_type $F32))) + (libcall_1 (LibCall.TruncF32) a)) + +(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64)))) + (x64_roundsd a (RoundImm.RoundZero))) + +(rule (lower (trunc a @ (value_type $F64))) + (libcall_1 (LibCall.TruncF64) a)) + +(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32X4)))) + (x64_roundps a (RoundImm.RoundZero))) + +(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2)))) + (x64_roundpd a (RoundImm.RoundZero))) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 3a7c53af8d..3fab7062a4 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -569,118 +569,15 @@ fn lower_insn_to_regs( | Opcode::Unarrow | Opcode::Bitcast | Opcode::Fabs - | Opcode::Fneg => { + | Opcode::Fneg + | Opcode::Fcopysign + | Opcode::Ceil + | Opcode::Floor + | Opcode::Nearest + | Opcode::Trunc => { implemented_in_isle(ctx); } - Opcode::Fcopysign => { - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let lhs = put_input_in_reg(ctx, inputs[0]); - let rhs = put_input_in_reg(ctx, inputs[1]); - - let ty = ty.unwrap(); - - // We're going to generate the following sequence: - // - // movabs $INT_MIN, tmp_gpr1 - // mov{d,q} tmp_gpr1, tmp_xmm1 - // movap{s,d} tmp_xmm1, dst - // andnp{s,d} src_1, dst - // movap{s,d} src_2, tmp_xmm2 - // andp{s,d} tmp_xmm1, tmp_xmm2 - // orp{s,d} tmp_xmm2, dst - - let tmp_xmm1 = ctx.alloc_tmp(types::F32).only_reg().unwrap(); - let tmp_xmm2 = ctx.alloc_tmp(types::F32).only_reg().unwrap(); - - let (sign_bit_cst, mov_op, and_not_op, and_op, or_op) = match ty { - types::F32 => ( - 0x8000_0000, - SseOpcode::Movaps, - SseOpcode::Andnps, - SseOpcode::Andps, - SseOpcode::Orps, - ), - types::F64 => ( - 0x8000_0000_0000_0000, - SseOpcode::Movapd, - SseOpcode::Andnpd, - SseOpcode::Andpd, - SseOpcode::Orpd, - ), - _ => { - panic!("unexpected type {:?} for copysign", ty); - } - }; - - for inst in Inst::gen_constant(ValueRegs::one(tmp_xmm1), sign_bit_cst, ty, |ty| { - ctx.alloc_tmp(ty).only_reg().unwrap() - }) { - ctx.emit(inst); - } - ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(tmp_xmm1.to_reg()), dst)); - ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst)); - ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2)); - ctx.emit(Inst::xmm_rm_r( - and_op, - RegMem::reg(tmp_xmm1.to_reg()), - tmp_xmm2, - )); - ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(tmp_xmm2.to_reg()), dst)); - } - - Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => { - let ty = ty.unwrap(); - if isa_flags.use_sse41() { - let mode = match op { - Opcode::Ceil => RoundImm::RoundUp, - Opcode::Floor => RoundImm::RoundDown, - Opcode::Nearest => RoundImm::RoundNearest, - Opcode::Trunc => RoundImm::RoundZero, - _ => panic!("unexpected opcode {:?} in Ceil/Floor/Nearest/Trunc", op), - }; - let op = match ty { - types::F32 => SseOpcode::Roundss, - types::F64 => SseOpcode::Roundsd, - types::F32X4 => SseOpcode::Roundps, - types::F64X2 => SseOpcode::Roundpd, - _ => panic!("unexpected type {:?} in Ceil/Floor/Nearest/Trunc", ty), - }; - let src = input_to_reg_mem(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::xmm_rm_r_imm( - op, - src, - dst, - mode.encode(), - OperandSize::Size32, - )); - } else { - // Lower to VM calls when there's no access to SSE4.1. - // Note, for vector types on platforms that don't support sse41 - // the execution will panic here. - let libcall = match (op, ty) { - (Opcode::Ceil, types::F32) => LibCall::CeilF32, - (Opcode::Ceil, types::F64) => LibCall::CeilF64, - (Opcode::Floor, types::F32) => LibCall::FloorF32, - (Opcode::Floor, types::F64) => LibCall::FloorF64, - (Opcode::Nearest, types::F32) => LibCall::NearestF32, - (Opcode::Nearest, types::F64) => LibCall::NearestF64, - (Opcode::Trunc, types::F32) => LibCall::TruncF32, - (Opcode::Trunc, types::F64) => LibCall::TruncF64, - _ => panic!( - "unexpected type/opcode {:?}/{:?} in Ceil/Floor/Nearest/Trunc", - ty, op - ), - }; - - let input = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - emit_vm_call(ctx, flags, triple, libcall, &[input], &[dst])?; - } - } - Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), Opcode::StackAddr => { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 4f2e32090f..4d0dfea4a4 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -176,6 +176,11 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { imm.encode() } + #[inline] + fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 { + imm.encode() + } + #[inline] fn avx512vl_enabled(&mut self, _: Type) -> Option<()> { if self.isa_flags.use_avx512vl_simd() { @@ -248,6 +253,15 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } } + #[inline] + fn use_sse41(&mut self, _: Type) -> Option<()> { + if self.isa_flags.use_sse41() { + Some(()) + } else { + None + } + } + #[inline] fn imm8_from_value(&mut self, val: Value) -> Option { let inst = self.lower_ctx.dfg().value_def(val).inst()?; @@ -715,6 +729,24 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { regs::rsp().to_real_reg().unwrap().into() } + fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg { + let call_conv = self.lower_ctx.abi().call_conv(); + let ret_ty = libcall.signature(call_conv).returns[0].value_type; + let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap(); + + emit_vm_call( + self.lower_ctx, + self.flags, + self.triple, + libcall.clone(), + &[a], + &[output_reg], + ) + .expect("Failed to emit LibCall"); + + output_reg.to_reg() + } + fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg { let call_conv = self.lower_ctx.abi().call_conv(); let ret_ty = libcall.signature(call_conv).returns[0].value_type; diff --git a/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif b/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif new file mode 100644 index 0000000000..f24acddcd4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif @@ -0,0 +1,33 @@ +test compile precise-output +target x86_64 has_sse41=false + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = ceil v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %CeilF32+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = ceil v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %CeilF64+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/ceil.clif b/cranelift/filetests/filetests/isa/x64/ceil.clif new file mode 100644 index 0000000000..743f0e70a9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/ceil.clif @@ -0,0 +1,59 @@ +test compile precise-output +target x86_64 has_sse41=true + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = ceil v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundss $2, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = ceil v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundsd $2, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = ceil v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundps $2, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = ceil v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundpd $2, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/fcopysign.clif b/cranelift/filetests/filetests/isa/x64/fcopysign.clif new file mode 100644 index 0000000000..a9d4a43348 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/fcopysign.clif @@ -0,0 +1,43 @@ +test compile precise-output +target x86_64 + +function %f1(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fcopysign v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $-2147483648, %r8d +; movd %r8d, %xmm9 +; movdqa %xmm0, %xmm14 +; movdqa %xmm9, %xmm0 +; andnps %xmm0, %xmm14, %xmm0 +; andps %xmm9, %xmm1, %xmm9 +; orps %xmm0, %xmm9, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f1(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fcopysign v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movabsq $-9223372036854775808, %r8 +; movq %r8, %xmm9 +; movdqa %xmm0, %xmm14 +; movdqa %xmm9, %xmm0 +; andnpd %xmm0, %xmm14, %xmm0 +; andpd %xmm9, %xmm1, %xmm9 +; orpd %xmm0, %xmm9, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/floor-libcall.clif b/cranelift/filetests/filetests/isa/x64/floor-libcall.clif new file mode 100644 index 0000000000..f965c6687c --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/floor-libcall.clif @@ -0,0 +1,33 @@ +test compile precise-output +target x86_64 has_sse41=false + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = floor v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %FloorF32+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = floor v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %FloorF64+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/floor.clif b/cranelift/filetests/filetests/isa/x64/floor.clif new file mode 100644 index 0000000000..e27004f6f2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/floor.clif @@ -0,0 +1,59 @@ +test compile precise-output +target x86_64 has_sse41=true + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = floor v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundss $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = floor v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundsd $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = floor v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundps $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = floor v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundpd $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif b/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif new file mode 100644 index 0000000000..8de2c110be --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif @@ -0,0 +1,33 @@ +test compile precise-output +target x86_64 has_sse41=false + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = nearest v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %NearestF32+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = nearest v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %NearestF64+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/nearest.clif b/cranelift/filetests/filetests/isa/x64/nearest.clif new file mode 100644 index 0000000000..46ec50cf1f --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/nearest.clif @@ -0,0 +1,59 @@ +test compile precise-output +target x86_64 has_sse41=true + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = nearest v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundss $0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = nearest v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundsd $0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = nearest v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundps $0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = nearest v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundpd $0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif b/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif new file mode 100644 index 0000000000..226388a1cb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif @@ -0,0 +1,33 @@ +test compile precise-output +target x86_64 has_sse41=false + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = trunc v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %TruncF32+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = trunc v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %TruncF64+0, %r8 +; call *%r8 +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/trunc.clif b/cranelift/filetests/filetests/isa/x64/trunc.clif new file mode 100644 index 0000000000..fbf9cf0948 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/trunc.clif @@ -0,0 +1,59 @@ +test compile precise-output +target x86_64 has_sse41=true + +function %f1(f32) -> f32 { +block0(v0: f32): + v1 = trunc v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundss $3, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(f64) -> f64 { +block0(v0: f64): + v1 = trunc v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundsd $3, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = trunc v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundps $3, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = trunc v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; roundpd $3, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +