diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index e9b0b4ea45..1d259270c9 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -2914,15 +2914,15 @@ (decl x64_neg (Type Gpr) Gpr) (rule (x64_neg ty src) (let ((dst WritableGpr (temp_writable_gpr)) - (size OperandSize (operand_size_of_type_32_64 ty)) + (size OperandSize (raw_operand_size_of_type ty)) (_ Unit (emit (MInst.Neg size src dst)))) dst)) - + ;; Helper for creating `neg` instructions whose flags are also used. (decl x64_neg_paired (Type Gpr) ProducesFlags) (rule (x64_neg_paired ty src) (let ((dst WritableGpr (temp_writable_gpr)) - (size OperandSize (operand_size_of_type_32_64 ty)) + (size OperandSize (raw_operand_size_of_type ty)) (inst MInst (MInst.Neg size src dst))) (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst dst))) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 5ff32deb1e..9221b067e1 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1172,6 +1172,50 @@ (x64_psllq (vector_all_ones) (RegMemImm.Imm 63)))) +;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl lower_bmask (Type Type ValueRegs) ValueRegs) + +;; Values that fit in a register +;; +;; Use the neg instruction on the input which sets the CF (carry) flag +;; to 0 if the input is 0 or 1 otherwise. +;; We then subtract the output register with itself, which always gives a 0, +;; however use the carry flag from the previous negate to generate a -1 if it +;; was nonzero. +;; +;; neg in_reg +;; sbb out_reg, out_reg +(rule 0 + (lower_bmask (fits_in_64 out_ty) (fits_in_64 in_ty) val) + (let ((reg Gpr (value_regs_get_gpr val 0)) + (out ValueRegs (with_flags + (x64_neg_paired in_ty reg) + (x64_sbb_paired out_ty reg reg)))) + ;; Extract only the output of the sbb instruction + (value_reg (value_regs_get out 1)))) + + +;; If the input type is I128 we can `or` the registers, and recurse to the general case. +(rule 1 + (lower_bmask (fits_in_64 out_ty) $I128 val) + (let ((lo Gpr (value_regs_get_gpr val 0)) + (hi Gpr (value_regs_get_gpr val 1)) + (mixed Gpr (x64_or $I64 lo hi))) + (lower_bmask out_ty $I64 (value_reg mixed)))) + +;; If the output type is I128 we just duplicate the result of the I64 lowering +(rule 2 + (lower_bmask $I128 in_ty val) + (let ((res ValueRegs (lower_bmask $I64 in_ty val)) + (res Gpr (value_regs_get_gpr res 0))) + (value_regs res res))) + + +;; Call the lower_bmask rule that does all the procssing +(rule (lower (has_type out_ty (bmask x @ (value_type in_ty)))) + (lower_bmask out_ty in_ty x)) + ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 759eb5e589..b277aa104a 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -467,7 +467,8 @@ fn lower_insn_to_regs( | Opcode::TlsValue | Opcode::SqmulRoundSat | Opcode::Uunarrow - | Opcode::Nop => { + | Opcode::Nop + | Opcode::Bmask => { let ty = if outputs.len() > 0 { Some(ctx.output_ty(insn, 0)) } else { @@ -496,8 +497,6 @@ fn lower_insn_to_regs( unimplemented!("or-not / xor-not opcodes not implemented"); } - Opcode::Bmask => unimplemented!("Bmask not implemented"), - Opcode::Vsplit | Opcode::Vconcat => { unimplemented!("Vector split/concat ops not implemented."); } diff --git a/cranelift/filetests/filetests/isa/x64/bmask.clif b/cranelift/filetests/filetests/isa/x64/bmask.clif new file mode 100644 index 0000000000..390d5a8678 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/bmask.clif @@ -0,0 +1,440 @@ +test compile precise-output +set enable_llvm_abi_extensions +target x86_64 + + +function %bmask_i64_i64(i64) -> i64 { +block0(v0: i64): + v1 = bmask.i64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negq %rcx, %rcx +; movq %rdi, %rax +; sbbq %rax, %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i64_i32(i64) -> i32 { +block0(v0: i64): + v1 = bmask.i32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negq %rcx, %rcx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i64_i16(i64) -> i16 { +block0(v0: i64): + v1 = bmask.i16 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negq %rcx, %rcx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i64_i8(i64) -> i8 { +block0(v0: i64): + v1 = bmask.i8 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negq %rcx, %rcx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i32_i64(i32) -> i64 { +block0(v0: i32): + v1 = bmask.i64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negl %ecx, %ecx +; movq %rdi, %rax +; sbbq %rax, %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i32_i32(i32) -> i32 { +block0(v0: i32): + v1 = bmask.i32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negl %ecx, %ecx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i32_i16(i32) -> i16 { +block0(v0: i32): + v1 = bmask.i16 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negl %ecx, %ecx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i32_i8(i32) -> i8 { +block0(v0: i32): + v1 = bmask.i8 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negl %ecx, %ecx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i16_i64(i16) -> i64 { +block0(v0: i16): + v1 = bmask.i64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negw %cx, %cx +; movq %rdi, %rax +; sbbq %rax, %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i16_i32(i16) -> i32 { +block0(v0: i16): + v1 = bmask.i32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negw %cx, %cx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i16_i16(i16) -> i16 { +block0(v0: i16): + v1 = bmask.i16 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negw %cx, %cx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i16_i8(i16) -> i8 { +block0(v0: i16): + v1 = bmask.i8 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negw %cx, %cx +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i8_i64(i8) -> i64 { +block0(v0: i8): + v1 = bmask.i64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negb %cl, %cl +; movq %rdi, %rax +; sbbq %rax, %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i8_i32(i8) -> i32 { +block0(v0: i8): + v1 = bmask.i32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negb %cl, %cl +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i8_i16(i8) -> i16 { +block0(v0: i8): + v1 = bmask.i16 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negb %cl, %cl +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i8_i8(i8) -> i8 { +block0(v0: i8): + v1 = bmask.i8 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; negb %cl, %cl +; movq %rdi, %rax +; sbbl %eax, %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i128_i128(i128) -> i128 { +block0(v0: i128): + v1 = bmask.i128 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rdx +; orq %rdx, %rsi, %rdx +; movq %rdx, %r10 +; negq %r10, %r10 +; sbbq %rdx, %rdx, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i128_i64(i128) -> i64 { +block0(v0: i128): + v1 = bmask.i64 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; orq %rax, %rsi, %rax +; movq %rax, %r9 +; negq %r9, %r9 +; sbbq %rax, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i128_i32(i128) -> i32 { +block0(v0: i128): + v1 = bmask.i32 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; orq %rax, %rsi, %rax +; movq %rax, %r9 +; negq %r9, %r9 +; sbbl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i128_i16(i128) -> i16 { +block0(v0: i128): + v1 = bmask.i16 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; orq %rax, %rsi, %rax +; movq %rax, %r9 +; negq %r9, %r9 +; sbbl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i128_i8(i128) -> i8 { +block0(v0: i128): + v1 = bmask.i8 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; orq %rax, %rsi, %rax +; movq %rax, %r9 +; negq %r9, %r9 +; sbbl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i64_i128(i64) -> i128 { +block0(v0: i64): + v1 = bmask.i128 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rdx +; negq %rdx, %rdx +; movq %rdi, %rdx +; sbbq %rdx, %rdi, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i32_i128(i32) -> i128 { +block0(v0: i32): + v1 = bmask.i128 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rdx +; negl %edx, %edx +; movq %rdi, %rdx +; sbbq %rdx, %rdi, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i16_i128(i16) -> i128 { +block0(v0: i16): + v1 = bmask.i128 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rdx +; negw %dx, %dx +; movq %rdi, %rdx +; sbbq %rdx, %rdi, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %bmask_i8_i128(i8) -> i128 { +block0(v0: i8): + v1 = bmask.i128 v0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rdx +; negb %dl, %dl +; movq %rdi, %rdx +; sbbq %rdx, %rdi, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/runtests/bmask.clif b/cranelift/filetests/filetests/runtests/bmask.clif index e762bbd078..e7791c510f 100644 --- a/cranelift/filetests/filetests/runtests/bmask.clif +++ b/cranelift/filetests/filetests/runtests/bmask.clif @@ -1,5 +1,6 @@ test interpret test run +target x86_64 target aarch64 target s390x target riscv64 diff --git a/cranelift/filetests/filetests/runtests/i128-bmask.clif b/cranelift/filetests/filetests/runtests/i128-bmask.clif index 9336742b27..ffd6d0f9bc 100644 --- a/cranelift/filetests/filetests/runtests/i128-bmask.clif +++ b/cranelift/filetests/filetests/runtests/i128-bmask.clif @@ -1,5 +1,7 @@ test interpret test run +set enable_llvm_abi_extensions +target x86_64 target aarch64 target riscv64 target s390x @@ -11,6 +13,10 @@ block0(v0: i128): } ; run: %bmask_i128_i128(1) == -1 ; run: %bmask_i128_i128(0) == 0 +; run: %bmask_i128_i128(0x00000001_00000000_00000000_00000000) == -1 +; run: %bmask_i128_i128(0x00000000_00000001_00000000_00000000) == -1 +; run: %bmask_i128_i128(0x00000000_00000000_00000001_00000000) == -1 +; run: %bmask_i128_i128(0x00000000_00000000_00000000_00000001) == -1 function %bmask_i128_i64(i128) -> i64 { block0(v0: i128): diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 825b73c02c..5113344609 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -646,57 +646,30 @@ const OPCODE_SIGNATURES: &'static [( (Opcode::Popcnt, &[I64], &[I64], insert_opcode), (Opcode::Popcnt, &[I128], &[I128], insert_opcode), // Bmask - // bmask not implemented in some backends: - // x64: https://github.com/bytecodealliance/wasmtime/issues/5106 - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I8], &[I8], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I16], &[I8], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I32], &[I8], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I64], &[I8], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I128], &[I8], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I8], &[I16], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I16], &[I16], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I32], &[I16], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I64], &[I16], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I128], &[I16], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I8], &[I32], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I16], &[I32], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I32], &[I32], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I64], &[I32], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I128], &[I32], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I8], &[I64], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I16], &[I64], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I32], &[I64], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I64], &[I64], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I128], &[I64], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I8], &[I128], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I16], &[I128], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I32], &[I128], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I64], &[I128], insert_opcode), - #[cfg(not(target_arch = "x86_64"))] (Opcode::Bmask, &[I128], &[I128], insert_opcode), // Fadd (Opcode::Fadd, &[F32, F32], &[F32], insert_opcode),