From d394edcefea3ba8a5d53e218af839f003e43fffd Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 24 Aug 2022 18:31:38 +0100 Subject: [PATCH] x64: Mask shift amounts for small types (#4752) * x64: Mask shift amounts for small types * cranelift: Disable i128 shifts in fuzzer again They are fixed. But we had a bunch of fuzzgen issues come in, and we don't want to accidentaly mark them as fixed * cranelift: Avoid masking shifts for 32 and 64 bit cases * cranelift: Add const shift tests and fix them * cranelift: Remove const `rotl` cases Now that `put_masked_in_imm8_gpr` works properly we can simplify rotl/rotr --- cranelift/codegen/src/isa/x64/inst.isle | 7 +- cranelift/codegen/src/isa/x64/lower.isle | 10 - cranelift/codegen/src/isa/x64/lower/isle.rs | 21 +- .../filetests/filetests/isa/x64/i128.clif | 4 +- .../filetests/filetests/isa/x64/ishl.clif | 547 +++++++++++++++++ .../filetests/filetests/isa/x64/sshr.clif | 566 ++++++++++++++++++ .../filetests/filetests/isa/x64/ushr.clif | 557 +++++++++++++++++ .../runtests/i128-shifts-small-types.clif | 86 --- .../filetests/runtests/i128-shifts.clif | 77 +++ .../filetests/filetests/runtests/shifts.clif | 162 +++++ 10 files changed, 1920 insertions(+), 117 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/ishl.clif create mode 100644 cranelift/filetests/filetests/isa/x64/sshr.clif create mode 100644 cranelift/filetests/filetests/isa/x64/ushr.clif delete mode 100644 cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 724c1529f7..1a9d50a7a2 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1057,7 +1057,12 @@ ;; ;; This is used when lowering various shifts and rotates. (decl put_masked_in_imm8_gpr (Value Type) Imm8Gpr) -(extern constructor put_masked_in_imm8_gpr put_masked_in_imm8_gpr) +(rule (put_masked_in_imm8_gpr (u64_from_iconst amt) ty) + (const_to_type_masked_imm8 amt ty)) +(rule (put_masked_in_imm8_gpr amt (fits_in_16 ty)) + (x64_and $I64 (value_regs_get_gpr amt 0) (RegMemImm.Imm (shift_mask ty)))) +(rule (put_masked_in_imm8_gpr amt ty) + (value_regs_get_gpr amt 0)) ;; Condition codes (type CC extern diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 72fadf17c8..f0646390e7 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -793,11 +793,6 @@ (rule (lower (has_type (fits_in_64 ty) (rotl src amt))) (x64_rotl ty src (put_masked_in_imm8_gpr amt ty))) -(rule (lower (has_type (fits_in_64 ty) - (rotl src (u64_from_iconst amt)))) - (x64_rotl ty src - (const_to_type_masked_imm8 amt ty))) - ;; `i128`. @@ -819,11 +814,6 @@ (rule (lower (has_type (fits_in_64 ty) (rotr src amt))) (x64_rotr ty src (put_masked_in_imm8_gpr amt ty))) -(rule (lower (has_type (fits_in_64 ty) - (rotr src (u64_from_iconst amt)))) - (x64_rotr ty src - (const_to_type_masked_imm8 amt ty))) - ;; `i128`. diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index b4cdd3c708..cf86de35ac 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -154,23 +154,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { RegMem::reg(self.put_in_reg(val)) } - fn put_masked_in_imm8_gpr(&mut self, val: Value, ty: Type) -> Imm8Gpr { - let inputs = self.lower_ctx.get_value_as_source_or_const(val); - - if let Some(c) = inputs.constant { - let mask = 1_u64.checked_shl(ty.bits()).map_or(u64::MAX, |x| x - 1); - return Imm8Gpr::new(Imm8Reg::Imm8 { - imm: (c & mask) as u8, - }) - .unwrap(); - } - - Imm8Gpr::new(Imm8Reg::Reg { - reg: self.put_in_regs(val).regs()[0], - }) - .unwrap() - } - #[inline] fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 { imm.encode() @@ -272,7 +255,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { #[inline] fn const_to_type_masked_imm8(&mut self, c: u64, ty: Type) -> Imm8Gpr { - let mask = 1_u64.checked_shl(ty.bits()).map_or(u64::MAX, |x| x - 1); + let mask = self.shift_mask(ty) as u64; Imm8Gpr::new(Imm8Reg::Imm8 { imm: (c & mask) as u8, }) @@ -281,6 +264,8 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { #[inline] fn shift_mask(&mut self, ty: Type) -> u32 { + debug_assert!(ty.lane_bits().is_power_of_two()); + ty.lane_bits() - 1 } diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 565905cc69..af1bf15c45 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -867,8 +867,8 @@ block0(v0: i8, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rsi, %r9 -; movq %r9, %rcx +; movq %rsi, %rcx +; andq %rcx, $7, %rcx ; shlb %cl, %dil, %dil ; movq %rdi, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/ishl.clif b/cranelift/filetests/filetests/isa/x64/ishl.clif new file mode 100644 index 0000000000..4577d7c024 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/ishl.clif @@ -0,0 +1,547 @@ +test compile precise-output +set enable_llvm_abi_extensions=true +target x86_64 + + + +function %ishl_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 + + v4 = ishl.i128 v0, v3 + + return v4 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbq %dl, %rax +; movq %rax, %rcx +; movq %rdi, %rdx +; shlq %cl, %rdx, %rdx +; shlq %cl, %rsi, %rsi +; movq %rcx, %r8 +; movl $64, %ecx +; subq %rcx, %r8, %rcx +; shrq %cl, %rdi, %rdi +; xorq %rax, %rax, %rax +; testq $127, %r8 +; cmovzq %rax, %rdi, %rdi +; orq %rdi, %rsi, %rdi +; testq $64, %r8 +; cmovzq %rdx, %rax, %rax +; cmovzq %rdi, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = ishl.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; movq %rdi, %rdx +; shlq %cl, %rdx, %rdx +; shlq %cl, %rsi, %rsi +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r9 +; subq %rcx, %r9, %rcx +; shrq %cl, %rdi, %rdi +; xorq %rax, %rax, %rax +; testq $127, %r9 +; cmovzq %rax, %rdi, %rdi +; orq %rdi, %rsi, %rdi +; testq $64, %r9 +; cmovzq %rdx, %rax, %rax +; cmovzq %rdi, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = ishl.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; movq %rdi, %rdx +; shlq %cl, %rdx, %rdx +; shlq %cl, %rsi, %rsi +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r9 +; subq %rcx, %r9, %rcx +; shrq %cl, %rdi, %rdi +; xorq %rax, %rax, %rax +; testq $127, %r9 +; cmovzq %rax, %rdi, %rdi +; orq %rdi, %rsi, %rdi +; testq $64, %r9 +; cmovzq %rdx, %rax, %rax +; cmovzq %rdi, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = ishl.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; movq %rdi, %rdx +; shlq %cl, %rdx, %rdx +; shlq %cl, %rsi, %rsi +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r9 +; subq %rcx, %r9, %rcx +; shrq %cl, %rdi, %rdi +; xorq %rax, %rax, %rax +; testq $127, %r9 +; cmovzq %rax, %rdi, %rdi +; orq %rdi, %rsi, %rdi +; testq $64, %r9 +; cmovzq %rdx, %rax, %rax +; cmovzq %rdi, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ishl.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; movq %rdi, %rdx +; shlq %cl, %rdx, %rdx +; shlq %cl, %rsi, %rsi +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r9 +; subq %rcx, %r9, %rcx +; shrq %cl, %rdi, %rdi +; xorq %rax, %rax, %rax +; testq $127, %r9 +; cmovzq %rax, %rdi, %rdi +; orq %rdi, %rsi, %rdi +; testq $64, %r9 +; cmovzq %rdx, %rax, %rax +; cmovzq %rdi, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = ishl.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %r9 +; movq %r9, %rcx +; shlq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = ishl.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %r9 +; movq %r9, %rcx +; shll %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ishl.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shlw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ishl.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shlb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i64_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shlq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i64_i32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = ishl.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shlq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i64_i16(i64, i16) -> i64 { +block0(v0: i64, v1: i16): + v2 = ishl.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shlq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i64_i8(i64, i8) -> i64 { +block0(v0: i64, v1: i8): + v2 = ishl.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shlq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i32_i64(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = ishl.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shll %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i32_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ishl.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shll %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i32_i16(i32, i16) -> i32 { +block0(v0: i32, v1: i16): + v2 = ishl.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shll %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i32_i8(i32, i8) -> i32 { +block0(v0: i32, v1: i8): + v2 = ishl.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shll %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = ishl.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shlw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = ishl.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shlw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ishl.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shlw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = ishl.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shlw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = ishl.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shlb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = ishl.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shlb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ishl.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shlb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i8_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ishl.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shlb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm.i64 v0, 65 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shlq $1, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm.i32 v0, 33 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shll $1, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = ishl_imm.i16 v0, 17 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shlw $1, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ishl_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = ishl_imm.i8 v0, 9 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shlb $1, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/sshr.clif b/cranelift/filetests/filetests/isa/x64/sshr.clif new file mode 100644 index 0000000000..0c8f1dd44e --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/sshr.clif @@ -0,0 +1,566 @@ +test compile precise-output +set enable_llvm_abi_extensions=true +target x86_64 + + +function %sshr_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 + + v4 = sshr.i128 v0, v3 + + return v4 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbq %dl, %rdx +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r9 +; sarq %cl, %r9, %r9 +; movl $64, %ecx +; subq %rcx, %rdx, %rcx +; movq %rsi, %r8 +; shlq %cl, %r8, %r8 +; xorq %r10, %r10, %r10 +; testq $127, %rdx +; cmovzq %r10, %r8, %r8 +; orq %rdi, %r8, %rdi +; sarq $63, %rsi, %rsi +; testq $64, %rdx +; movq %r9, %rax +; cmovzq %rdi, %rax, %rax +; cmovzq %r9, %rsi, %rsi +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = sshr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %rdx +; sarq %cl, %rdx, %rdx +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; movq %rsi, %rax +; shlq %cl, %rax, %rax +; xorq %r8, %r8, %r8 +; testq $127, %r10 +; cmovzq %r8, %rax, %rax +; orq %rdi, %rax, %rdi +; sarq $63, %rsi, %rsi +; testq $64, %r10 +; movq %rdx, %rax +; cmovzq %rdi, %rax, %rax +; cmovzq %rdx, %rsi, %rsi +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = sshr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %rdx +; sarq %cl, %rdx, %rdx +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; movq %rsi, %rax +; shlq %cl, %rax, %rax +; xorq %r8, %r8, %r8 +; testq $127, %r10 +; cmovzq %r8, %rax, %rax +; orq %rdi, %rax, %rdi +; sarq $63, %rsi, %rsi +; testq $64, %r10 +; movq %rdx, %rax +; cmovzq %rdi, %rax, %rax +; cmovzq %rdx, %rsi, %rsi +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = sshr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %rdx +; sarq %cl, %rdx, %rdx +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; movq %rsi, %rax +; shlq %cl, %rax, %rax +; xorq %r8, %r8, %r8 +; testq $127, %r10 +; cmovzq %r8, %rax, %rax +; orq %rdi, %rax, %rdi +; sarq $63, %rsi, %rsi +; testq $64, %r10 +; movq %rdx, %rax +; cmovzq %rdi, %rax, %rax +; cmovzq %rdx, %rsi, %rsi +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = sshr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %rdx +; sarq %cl, %rdx, %rdx +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; movq %rsi, %rax +; shlq %cl, %rax, %rax +; xorq %r8, %r8, %r8 +; testq $127, %r10 +; cmovzq %r8, %rax, %rax +; orq %rdi, %rax, %rdi +; sarq $63, %rsi, %rsi +; testq $64, %r10 +; movq %rdx, %rax +; cmovzq %rdi, %rax, %rax +; cmovzq %rdx, %rsi, %rsi +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = sshr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %r9 +; movq %r9, %rcx +; sarq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = sshr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %r9 +; movq %r9, %rcx +; sarl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = sshr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; sarw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = sshr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; sarb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i64_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sshr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i64_i32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sshr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i64_i16(i64, i16) -> i64 { +block0(v0: i64, v1: i16): + v2 = sshr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i64_i8(i64, i8) -> i64 { +block0(v0: i64, v1: i8): + v2 = sshr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i32_i64(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sshr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i32_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sshr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i32_i16(i32, i16) -> i32 { +block0(v0: i32, v1: i16): + v2 = sshr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i32_i8(i32, i8) -> i32 { +block0(v0: i32, v1: i8): + v2 = sshr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; sarl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = sshr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; sarw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = sshr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; sarw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sshr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; sarw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = sshr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; sarw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = sshr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; sarb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = sshr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; sarb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = sshr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; sarb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i8_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sshr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; sarb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + + +function %sshr_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = sshr_imm.i64 v0, 65 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; sarq $1, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = sshr_imm.i32 v0, 33 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; sarl $1, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = sshr_imm.i16 v0, 17 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; sarw $1, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %sshr_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = sshr_imm.i8 v0, 9 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; sarb $1, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/ushr.clif b/cranelift/filetests/filetests/isa/x64/ushr.clif new file mode 100644 index 0000000000..8f7f7800ce --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/ushr.clif @@ -0,0 +1,557 @@ +test compile precise-output +set enable_llvm_abi_extensions=true +target x86_64 + + +function %ushr_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 + + v4 = ushr.i128 v0, v3 + return v4 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbq %dl, %rdx +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r9 +; shrq %cl, %r9, %r9 +; movl $64, %ecx +; movq %rdx, %r10 +; subq %rcx, %r10, %rcx +; shlq %cl, %rsi, %rsi +; xorq %r8, %r8, %r8 +; testq $127, %r10 +; cmovzq %r8, %rsi, %rsi +; orq %rsi, %rdi, %rsi +; xorq %rdx, %rdx, %rdx +; testq $64, %r10 +; movq %r9, %rax +; cmovzq %rsi, %rax, %rax +; cmovzq %r9, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = ushr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r8 +; shrq %cl, %r8, %r8 +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; shlq %cl, %rsi, %rsi +; xorq %rax, %rax, %rax +; testq $127, %r10 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %rdi, %rsi +; xorq %rdx, %rdx, %rdx +; testq $64, %r10 +; movq %r8, %rax +; cmovzq %rsi, %rax, %rax +; cmovzq %r8, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = ushr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r8 +; shrq %cl, %r8, %r8 +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; shlq %cl, %rsi, %rsi +; xorq %rax, %rax, %rax +; testq $127, %r10 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %rdi, %rsi +; xorq %rdx, %rdx, %rdx +; testq $64, %r10 +; movq %r8, %rax +; cmovzq %rsi, %rax, %rax +; cmovzq %r8, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = ushr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r8 +; shrq %cl, %r8, %r8 +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; shlq %cl, %rsi, %rsi +; xorq %rax, %rax, %rax +; testq $127, %r10 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %rdi, %rsi +; xorq %rdx, %rdx, %rdx +; testq $64, %r10 +; movq %r8, %rax +; cmovzq %rsi, %rax, %rax +; cmovzq %r8, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ushr.i128 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r8 +; shrq %cl, %r8, %r8 +; movq %rcx, %rax +; movl $64, %ecx +; movq %rax, %r10 +; subq %rcx, %r10, %rcx +; shlq %cl, %rsi, %rsi +; xorq %rax, %rax, %rax +; testq $127, %r10 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %rdi, %rsi +; xorq %rdx, %rdx, %rdx +; testq $64, %r10 +; movq %r8, %rax +; cmovzq %rsi, %rax, %rax +; cmovzq %r8, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = ushr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %r9 +; movq %r9, %rcx +; shrq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i32_i128(i32, i64, i64) -> i32 { +block0(v0: i32, v1: i64, v2: i64): + v3 = iconcat v1, v2 + v4 = ushr.i32 v0, v3 + return v4 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ushr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shrw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ushr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shrb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i64_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ushr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i64_i32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = ushr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i64_i16(i64, i16) -> i64 { +block0(v0: i64, v1: i16): + v2 = ushr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i64_i8(i64, i8) -> i64 { +block0(v0: i64, v1: i8): + v2 = ushr.i64 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrq %cl, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i32_i64(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = ushr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i32_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ushr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i32_i16(i32, i16) -> i32 { +block0(v0: i32, v1: i16): + v2 = ushr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i32_i8(i32, i8) -> i32 { +block0(v0: i32, v1: i8): + v2 = ushr.i32 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; shrl %cl, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = ushr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shrw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = ushr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shrw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ushr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shrw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = ushr.i16 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $15, %rcx +; shrw %cl, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = ushr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shrb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = ushr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shrb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ushr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shrb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i8_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ushr.i8 v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %rcx +; andq %rcx, $7, %rcx +; shrb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + + + +function %ushr_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = ushr_imm.i64 v0, 65 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shrq $1, %rdi, %rdi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = ushr_imm.i32 v0, 33 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shrl $1, %edi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = ushr_imm.i16 v0, 17 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shrw $1, %di, %di +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %ushr_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = ushr_imm.i8 v0, 9 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; shrb $1, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif deleted file mode 100644 index 64fa59c441..0000000000 --- a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif +++ /dev/null @@ -1,86 +0,0 @@ -test interpret -test run -target aarch64 -target s390x - -; TODO: Merge this with the main i128-shifts file when x86_64 passes these. - -function %ishl_i16_i128(i16, i128) -> i16 { -block0(v0: i16, v1: i128): - v2 = ishl.i16 v0, v1 - return v2 -} -; run: %ishl_i16_i128(0x0000, 0) == 0x0000 -; run: %ishl_i16_i128(0x0000, 1) == 0x0000 -; run: %ishl_i16_i128(0x000f, 4) == 0x00f0 -; run: %ishl_i16_i128(0x0004, 16) == 0x0004 -; run: %ishl_i16_i128(0x0004, 17) == 0x0008 -; run: %ishl_i16_i128(0x000f, 0x00000000_00000004_00000000_00000000) == 0x000f -; run: %ishl_i16_i128(0x0004, 0x00000000_00000001_00000000_00000012) == 0x0010 - -function %ishl_i8_i128(i8, i128) -> i8 { -block0(v0: i8, v1: i128): - v2 = ishl.i8 v0, v1 - return v2 -} -; run: %ishl_i8_i128(0x00, 0) == 0x00 -; run: %ishl_i8_i128(0x00, 1) == 0x00 -; run: %ishl_i8_i128(0x0f, 4) == 0xf0 -; run: %ishl_i8_i128(0x04, 8) == 0x04 -; run: %ishl_i8_i128(0x04, 9) == 0x08 -; run: %ishl_i8_i128(0x0f, 0x00000000_00000004_00000000_00000000) == 0x0f -; run: %ishl_i8_i128(0x04, 0x00000000_00000001_00000000_0000000A) == 0x10 - - -function %ushr_i16_i128(i16, i128) -> i16 { -block0(v0: i16, v1: i128): - v2 = ushr.i16 v0, v1 - return v2 -} -; run: %ushr_i16_i128(0x1000, 0) == 0x1000 -; run: %ushr_i16_i128(0x1000, 1) == 0x0800 -; run: %ushr_i16_i128(0xf000, 4) == 0x0f00 -; run: %ushr_i16_i128(0x4000, 16) == 0x4000 -; run: %ushr_i16_i128(0x4000, 17) == 0x2000 -; run: %ushr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 -; run: %ushr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 - -function %ushr_i8_i128(i8, i128) -> i8 { -block0(v0: i8, v1: i128): - v2 = ushr.i8 v0, v1 - return v2 -} -; run: %ushr_i8_i128(0x10, 0) == 0x10 -; run: %ushr_i8_i128(0x10, 1) == 0x08 -; run: %ushr_i8_i128(0xf0, 4) == 0x0f -; run: %ushr_i8_i128(0x40, 8) == 0x40 -; run: %ushr_i8_i128(0x40, 9) == 0x20 -; run: %ushr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 -; run: %ushr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 - - -function %sshr_i16_i128(i16, i128) -> i16 { -block0(v0: i16, v1: i128): - v2 = sshr.i16 v0, v1 - return v2 -} -; run: %sshr_i16_i128(0x8000, 0) == 0x8000 -; run: %sshr_i16_i128(0x8000, 1) == 0xC000 -; run: %sshr_i16_i128(0xf000, 4) == 0xff00 -; run: %sshr_i16_i128(0x4000, 16) == 0x4000 -; run: %sshr_i16_i128(0x4000, 17) == 0x2000 -; run: %sshr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 -; run: %sshr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 - -function %sshr_i8_i128(i8, i128) -> i8 { -block0(v0: i8, v1: i128): - v2 = sshr.i8 v0, v1 - return v2 -} -; run: %sshr_i8_i128(0x80, 0) == 0x80 -; run: %sshr_i8_i128(0x80, 1) == 0xC0 -; run: %sshr_i8_i128(0xf0, 4) == 0xff -; run: %sshr_i8_i128(0x40, 8) == 0x40 -; run: %sshr_i8_i128(0x40, 9) == 0x20 -; run: %sshr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 -; run: %sshr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 diff --git a/cranelift/filetests/filetests/runtests/i128-shifts.clif b/cranelift/filetests/filetests/runtests/i128-shifts.clif index 272c241f44..ec0247a775 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts.clif @@ -127,6 +127,31 @@ block0(v0: i32, v1: i128): ; run: %ishl_i32_i128(0x0000000f, 0x00000000_00000004_00000000_00000000) == 0x0000000f ; run: %ishl_i32_i128(0x00000004, 0x00000000_00000001_00000000_00000022) == 0x00000010 +function %ishl_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ishl.i16 v0, v1 + return v2 +} +; run: %ishl_i16_i128(0x0000, 0) == 0x0000 +; run: %ishl_i16_i128(0x0000, 1) == 0x0000 +; run: %ishl_i16_i128(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i128(0x0004, 16) == 0x0004 +; run: %ishl_i16_i128(0x0004, 17) == 0x0008 +; run: %ishl_i16_i128(0x000f, 0x00000000_00000004_00000000_00000000) == 0x000f +; run: %ishl_i16_i128(0x0004, 0x00000000_00000001_00000000_00000012) == 0x0010 + +function %ishl_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ishl.i8 v0, v1 + return v2 +} +; run: %ishl_i8_i128(0x00, 0) == 0x00 +; run: %ishl_i8_i128(0x00, 1) == 0x00 +; run: %ishl_i8_i128(0x0f, 4) == 0xf0 +; run: %ishl_i8_i128(0x04, 8) == 0x04 +; run: %ishl_i8_i128(0x04, 9) == 0x08 +; run: %ishl_i8_i128(0x0f, 0x00000000_00000004_00000000_00000000) == 0x0f +; run: %ishl_i8_i128(0x04, 0x00000000_00000001_00000000_0000000A) == 0x10 function %ushr_i128_i128(i128, i8) -> i128 { @@ -245,6 +270,32 @@ block0(v0: i32, v1: i64, v2: i64): ; run: %ushr_i32_i128(0x40000000, 34, 1) == 0x10000000 +function %ushr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ushr.i16 v0, v1 + return v2 +} +; run: %ushr_i16_i128(0x1000, 0) == 0x1000 +; run: %ushr_i16_i128(0x1000, 1) == 0x0800 +; run: %ushr_i16_i128(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i128(0x4000, 16) == 0x4000 +; run: %ushr_i16_i128(0x4000, 17) == 0x2000 +; run: %ushr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 +; run: %ushr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 + +function %ushr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ushr.i8 v0, v1 + return v2 +} +; run: %ushr_i8_i128(0x10, 0) == 0x10 +; run: %ushr_i8_i128(0x10, 1) == 0x08 +; run: %ushr_i8_i128(0xf0, 4) == 0x0f +; run: %ushr_i8_i128(0x40, 8) == 0x40 +; run: %ushr_i8_i128(0x40, 9) == 0x20 +; run: %ushr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 +; run: %ushr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 + function %sshr_i128_i128(i128, i8) -> i128 { block0(v0: i128, v1: i8): @@ -353,3 +404,29 @@ block0(v0: i32, v1: i128): ; run: %sshr_i32_i128(0x40000000, 33) == 0x20000000 ; run: %sshr_i32_i128(0xf0000000, 0x00000000_00000004_00000000_00000000) == 0xf0000000 ; run: %sshr_i32_i128(0x40000000, 0x00000000_00000001_00000000_00000022) == 0x10000000 + +function %sshr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = sshr.i16 v0, v1 + return v2 +} +; run: %sshr_i16_i128(0x8000, 0) == 0x8000 +; run: %sshr_i16_i128(0x8000, 1) == 0xC000 +; run: %sshr_i16_i128(0xf000, 4) == 0xff00 +; run: %sshr_i16_i128(0x4000, 16) == 0x4000 +; run: %sshr_i16_i128(0x4000, 17) == 0x2000 +; run: %sshr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 +; run: %sshr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 + +function %sshr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = sshr.i8 v0, v1 + return v2 +} +; run: %sshr_i8_i128(0x80, 0) == 0x80 +; run: %sshr_i8_i128(0x80, 1) == 0xC0 +; run: %sshr_i8_i128(0xf0, 4) == 0xff +; run: %sshr_i8_i128(0x40, 8) == 0x40 +; run: %sshr_i8_i128(0x40, 9) == 0x20 +; run: %sshr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 +; run: %sshr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 diff --git a/cranelift/filetests/filetests/runtests/shifts.clif b/cranelift/filetests/filetests/runtests/shifts.clif index 9fd7657534..224ac500d1 100644 --- a/cranelift/filetests/filetests/runtests/shifts.clif +++ b/cranelift/filetests/filetests/runtests/shifts.clif @@ -120,6 +120,9 @@ block0(v0: i16, v1: i64): ; run: %ishl_i16_i64(0x0000, 1) == 0x0000 ; run: %ishl_i16_i64(0x000f, 0) == 0x000f ; run: %ishl_i16_i64(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i64(0x0004, 16) == 0x0004 +; run: %ishl_i16_i64(0x0004, 17) == 0x0008 +; run: %ishl_i16_i64(0x0004, 18) == 0x0010 ; run: %ishl_i16_i64(0x0004, 32) == 0x0004 ; run: %ishl_i16_i64(0x0004, 33) == 0x0008 ; run: %ishl_i16_i64(0x0004, 34) == 0x0010 @@ -133,6 +136,9 @@ block0(v0: i16, v1: i32): ; run: %ishl_i16_i32(0x0000, 1) == 0x0000 ; run: %ishl_i16_i32(0x000f, 0) == 0x000f ; run: %ishl_i16_i32(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i32(0x0004, 16) == 0x0004 +; run: %ishl_i16_i32(0x0004, 17) == 0x0008 +; run: %ishl_i16_i32(0x0004, 18) == 0x0010 ; run: %ishl_i16_i32(0x0004, 32) == 0x0004 ; run: %ishl_i16_i32(0x0004, 33) == 0x0008 ; run: %ishl_i16_i32(0x0004, 34) == 0x0010 @@ -146,6 +152,9 @@ block0(v0: i16, v1: i16): ; run: %ishl_i16_i16(0x0000, 1) == 0x0000 ; run: %ishl_i16_i16(0x000f, 0) == 0x000f ; run: %ishl_i16_i16(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i16(0x0004, 16) == 0x0004 +; run: %ishl_i16_i16(0x0004, 17) == 0x0008 +; run: %ishl_i16_i16(0x0004, 18) == 0x0010 ; run: %ishl_i16_i16(0x0004, 32) == 0x0004 ; run: %ishl_i16_i16(0x0004, 33) == 0x0008 ; run: %ishl_i16_i16(0x0004, 34) == 0x0010 @@ -159,6 +168,9 @@ block0(v0: i16, v1: i8): ; run: %ishl_i16_i8(0x0000, 1) == 0x0000 ; run: %ishl_i16_i8(0x000f, 0) == 0x000f ; run: %ishl_i16_i8(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i8(0x0004, 16) == 0x0004 +; run: %ishl_i16_i8(0x0004, 17) == 0x0008 +; run: %ishl_i16_i8(0x0004, 18) == 0x0010 ; run: %ishl_i16_i8(0x0004, 32) == 0x0004 ; run: %ishl_i16_i8(0x0004, 33) == 0x0008 ; run: %ishl_i16_i8(0x0004, 34) == 0x0010 @@ -173,6 +185,9 @@ block0(v0: i8, v1: i64): ; run: %ishl_i8_i64(0x00, 1) == 0x00 ; run: %ishl_i8_i64(0x0f, 0) == 0x0f ; run: %ishl_i8_i64(0x0f, 4) == 0xf0 +; run: %ishl_i8_i64(0x04, 8) == 0x04 +; run: %ishl_i8_i64(0x04, 9) == 0x08 +; run: %ishl_i8_i64(0x04, 10) == 0x10 ; run: %ishl_i8_i64(0x04, 32) == 0x04 ; run: %ishl_i8_i64(0x04, 33) == 0x08 ; run: %ishl_i8_i64(0x04, 34) == 0x10 @@ -186,6 +201,9 @@ block0(v0: i8, v1: i32): ; run: %ishl_i8_i32(0x00, 1) == 0x00 ; run: %ishl_i8_i32(0x0f, 0) == 0x0f ; run: %ishl_i8_i32(0x0f, 4) == 0xf0 +; run: %ishl_i8_i32(0x04, 8) == 0x04 +; run: %ishl_i8_i32(0x04, 9) == 0x08 +; run: %ishl_i8_i32(0x04, 10) == 0x10 ; run: %ishl_i8_i32(0x04, 32) == 0x04 ; run: %ishl_i8_i32(0x04, 33) == 0x08 ; run: %ishl_i8_i32(0x04, 34) == 0x10 @@ -199,6 +217,9 @@ block0(v0: i8, v1: i16): ; run: %ishl_i8_i16(0x00, 1) == 0x00 ; run: %ishl_i8_i16(0x0f, 0) == 0x0f ; run: %ishl_i8_i16(0x0f, 4) == 0xf0 +; run: %ishl_i8_i16(0x04, 8) == 0x04 +; run: %ishl_i8_i16(0x04, 9) == 0x08 +; run: %ishl_i8_i16(0x04, 10) == 0x10 ; run: %ishl_i8_i16(0x04, 32) == 0x04 ; run: %ishl_i8_i16(0x04, 33) == 0x08 ; run: %ishl_i8_i16(0x04, 34) == 0x10 @@ -212,6 +233,9 @@ block0(v0: i8, v1: i8): ; run: %ishl_i8_i8(0x00, 1) == 0x00 ; run: %ishl_i8_i8(0x0f, 0) == 0x0f ; run: %ishl_i8_i8(0x0f, 4) == 0xf0 +; run: %ishl_i8_i8(0x04, 8) == 0x04 +; run: %ishl_i8_i8(0x04, 9) == 0x08 +; run: %ishl_i8_i8(0x04, 10) == 0x10 ; run: %ishl_i8_i8(0x04, 32) == 0x04 ; run: %ishl_i8_i8(0x04, 33) == 0x08 ; run: %ishl_i8_i8(0x04, 34) == 0x10 @@ -332,6 +356,9 @@ block0(v0: i16, v1: i64): ; run: %ushr_i16_i64(0x1000, 1) == 0x0800 ; run: %ushr_i16_i64(0xf000, 0) == 0xf000 ; run: %ushr_i16_i64(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i64(0x4000, 16) == 0x4000 +; run: %ushr_i16_i64(0x4000, 17) == 0x2000 +; run: %ushr_i16_i64(0x4000, 18) == 0x1000 ; run: %ushr_i16_i64(0x4000, 32) == 0x4000 ; run: %ushr_i16_i64(0x4000, 33) == 0x2000 ; run: %ushr_i16_i64(0x4000, 34) == 0x1000 @@ -345,6 +372,9 @@ block0(v0: i16, v1: i32): ; run: %ushr_i16_i32(0x1000, 1) == 0x0800 ; run: %ushr_i16_i32(0xf000, 0) == 0xf000 ; run: %ushr_i16_i32(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i32(0x4000, 16) == 0x4000 +; run: %ushr_i16_i32(0x4000, 17) == 0x2000 +; run: %ushr_i16_i32(0x4000, 18) == 0x1000 ; run: %ushr_i16_i32(0x4000, 32) == 0x4000 ; run: %ushr_i16_i32(0x4000, 33) == 0x2000 ; run: %ushr_i16_i32(0x4000, 34) == 0x1000 @@ -358,6 +388,9 @@ block0(v0: i16, v1: i16): ; run: %ushr_i16_i16(0x1000, 1) == 0x0800 ; run: %ushr_i16_i16(0xf000, 0) == 0xf000 ; run: %ushr_i16_i16(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i16(0x4000, 16) == 0x4000 +; run: %ushr_i16_i16(0x4000, 17) == 0x2000 +; run: %ushr_i16_i16(0x4000, 18) == 0x1000 ; run: %ushr_i16_i16(0x4000, 32) == 0x4000 ; run: %ushr_i16_i16(0x4000, 33) == 0x2000 ; run: %ushr_i16_i16(0x4000, 34) == 0x1000 @@ -371,6 +404,9 @@ block0(v0: i16, v1: i8): ; run: %ushr_i16_i8(0x1000, 1) == 0x0800 ; run: %ushr_i16_i8(0xf000, 0) == 0xf000 ; run: %ushr_i16_i8(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i8(0x4000, 16) == 0x4000 +; run: %ushr_i16_i8(0x4000, 17) == 0x2000 +; run: %ushr_i16_i8(0x4000, 18) == 0x1000 ; run: %ushr_i16_i8(0x4000, 32) == 0x4000 ; run: %ushr_i16_i8(0x4000, 33) == 0x2000 ; run: %ushr_i16_i8(0x4000, 34) == 0x1000 @@ -385,6 +421,9 @@ block0(v0: i8, v1: i64): ; run: %ushr_i8_i64(0x10, 1) == 0x08 ; run: %ushr_i8_i64(0xf0, 0) == 0xf0 ; run: %ushr_i8_i64(0xf0, 4) == 0x0f +; run: %ushr_i8_i64(0x40, 8) == 0x40 +; run: %ushr_i8_i64(0x40, 9) == 0x20 +; run: %ushr_i8_i64(0x40, 10) == 0x10 ; run: %ushr_i8_i64(0x40, 32) == 0x40 ; run: %ushr_i8_i64(0x40, 33) == 0x20 ; run: %ushr_i8_i64(0x40, 34) == 0x10 @@ -398,6 +437,9 @@ block0(v0: i8, v1: i32): ; run: %ushr_i8_i32(0x10, 1) == 0x08 ; run: %ushr_i8_i32(0xf0, 0) == 0xf0 ; run: %ushr_i8_i32(0xf0, 4) == 0x0f +; run: %ushr_i8_i32(0x40, 8) == 0x40 +; run: %ushr_i8_i32(0x40, 9) == 0x20 +; run: %ushr_i8_i32(0x40, 10) == 0x10 ; run: %ushr_i8_i32(0x40, 32) == 0x40 ; run: %ushr_i8_i32(0x40, 33) == 0x20 ; run: %ushr_i8_i32(0x40, 34) == 0x10 @@ -411,6 +453,9 @@ block0(v0: i8, v1: i16): ; run: %ushr_i8_i16(0x10, 1) == 0x08 ; run: %ushr_i8_i16(0xf0, 0) == 0xf0 ; run: %ushr_i8_i16(0xf0, 4) == 0x0f +; run: %ushr_i8_i16(0x40, 8) == 0x40 +; run: %ushr_i8_i16(0x40, 9) == 0x20 +; run: %ushr_i8_i16(0x40, 10) == 0x10 ; run: %ushr_i8_i16(0x40, 32) == 0x40 ; run: %ushr_i8_i16(0x40, 33) == 0x20 ; run: %ushr_i8_i16(0x40, 34) == 0x10 @@ -424,6 +469,9 @@ block0(v0: i8, v1: i8): ; run: %ushr_i8_i8(0x10, 1) == 0x08 ; run: %ushr_i8_i8(0xf0, 0) == 0xf0 ; run: %ushr_i8_i8(0xf0, 4) == 0x0f +; run: %ushr_i8_i8(0x40, 8) == 0x40 +; run: %ushr_i8_i8(0x40, 9) == 0x20 +; run: %ushr_i8_i8(0x40, 10) == 0x10 ; run: %ushr_i8_i8(0x40, 32) == 0x40 ; run: %ushr_i8_i8(0x40, 33) == 0x20 ; run: %ushr_i8_i8(0x40, 34) == 0x10 @@ -544,6 +592,9 @@ block0(v0: i16, v1: i64): ; run: %sshr_i16_i64(0x8000, 1) == 0xC000 ; run: %sshr_i16_i64(0xf000, 0) == 0xf000 ; run: %sshr_i16_i64(0xf000, 4) == 0xff00 +; run: %sshr_i16_i64(0x4000, 16) == 0x4000 +; run: %sshr_i16_i64(0x4000, 17) == 0x2000 +; run: %sshr_i16_i64(0x4000, 18) == 0x1000 ; run: %sshr_i16_i64(0x4000, 32) == 0x4000 ; run: %sshr_i16_i64(0x4000, 33) == 0x2000 ; run: %sshr_i16_i64(0x4000, 34) == 0x1000 @@ -557,6 +608,9 @@ block0(v0: i16, v1: i32): ; run: %sshr_i16_i32(0x8000, 1) == 0xC000 ; run: %sshr_i16_i32(0xf000, 0) == 0xf000 ; run: %sshr_i16_i32(0xf000, 4) == 0xff00 +; run: %sshr_i16_i32(0x4000, 16) == 0x4000 +; run: %sshr_i16_i32(0x4000, 17) == 0x2000 +; run: %sshr_i16_i32(0x4000, 18) == 0x1000 ; run: %sshr_i16_i32(0x4000, 32) == 0x4000 ; run: %sshr_i16_i32(0x4000, 33) == 0x2000 ; run: %sshr_i16_i32(0x4000, 34) == 0x1000 @@ -570,6 +624,9 @@ block0(v0: i16, v1: i16): ; run: %sshr_i16_i16(0x8000, 1) == 0xC000 ; run: %sshr_i16_i16(0xf000, 0) == 0xf000 ; run: %sshr_i16_i16(0xf000, 4) == 0xff00 +; run: %sshr_i16_i16(0x4000, 16) == 0x4000 +; run: %sshr_i16_i16(0x4000, 17) == 0x2000 +; run: %sshr_i16_i16(0x4000, 18) == 0x1000 ; run: %sshr_i16_i16(0x4000, 32) == 0x4000 ; run: %sshr_i16_i16(0x4000, 33) == 0x2000 ; run: %sshr_i16_i16(0x4000, 34) == 0x1000 @@ -583,6 +640,9 @@ block0(v0: i16, v1: i8): ; run: %sshr_i16_i8(0x8000, 1) == 0xC000 ; run: %sshr_i16_i8(0xf000, 0) == 0xf000 ; run: %sshr_i16_i8(0xf000, 4) == 0xff00 +; run: %sshr_i16_i8(0x4000, 16) == 0x4000 +; run: %sshr_i16_i8(0x4000, 17) == 0x2000 +; run: %sshr_i16_i8(0x4000, 18) == 0x1000 ; run: %sshr_i16_i8(0x4000, 32) == 0x4000 ; run: %sshr_i16_i8(0x4000, 33) == 0x2000 ; run: %sshr_i16_i8(0x4000, 34) == 0x1000 @@ -597,6 +657,9 @@ block0(v0: i8, v1: i64): ; run: %sshr_i8_i64(0x80, 1) == 0xC0 ; run: %sshr_i8_i64(0xf0, 0) == 0xf0 ; run: %sshr_i8_i64(0xf0, 4) == 0xff +; run: %sshr_i8_i64(0x40, 8) == 0x40 +; run: %sshr_i8_i64(0x40, 9) == 0x20 +; run: %sshr_i8_i64(0x40, 10) == 0x10 ; run: %sshr_i8_i64(0x40, 32) == 0x40 ; run: %sshr_i8_i64(0x40, 33) == 0x20 ; run: %sshr_i8_i64(0x40, 34) == 0x10 @@ -610,6 +673,9 @@ block0(v0: i8, v1: i32): ; run: %sshr_i8_i32(0x80, 1) == 0xC0 ; run: %sshr_i8_i32(0xf0, 0) == 0xf0 ; run: %sshr_i8_i32(0xf0, 4) == 0xff +; run: %sshr_i8_i32(0x40, 8) == 0x40 +; run: %sshr_i8_i32(0x40, 9) == 0x20 +; run: %sshr_i8_i32(0x40, 10) == 0x10 ; run: %sshr_i8_i32(0x40, 32) == 0x40 ; run: %sshr_i8_i32(0x40, 33) == 0x20 ; run: %sshr_i8_i32(0x40, 34) == 0x10 @@ -623,6 +689,9 @@ block0(v0: i8, v1: i16): ; run: %sshr_i8_i16(0x80, 1) == 0xC0 ; run: %sshr_i8_i16(0xf0, 0) == 0xf0 ; run: %sshr_i8_i16(0xf0, 4) == 0xff +; run: %sshr_i8_i16(0x40, 8) == 0x40 +; run: %sshr_i8_i16(0x40, 9) == 0x20 +; run: %sshr_i8_i16(0x40, 10) == 0x10 ; run: %sshr_i8_i16(0x40, 32) == 0x40 ; run: %sshr_i8_i16(0x40, 33) == 0x20 ; run: %sshr_i8_i16(0x40, 34) == 0x10 @@ -636,6 +705,99 @@ block0(v0: i8, v1: i8): ; run: %sshr_i8_i8(0x80, 1) == 0xC0 ; run: %sshr_i8_i8(0xf0, 0) == 0xf0 ; run: %sshr_i8_i8(0xf0, 4) == 0xff +; run: %sshr_i8_i8(0x40, 8) == 0x40 +; run: %sshr_i8_i8(0x40, 9) == 0x20 +; run: %sshr_i8_i8(0x40, 10) == 0x10 ; run: %sshr_i8_i8(0x40, 32) == 0x40 ; run: %sshr_i8_i8(0x40, 33) == 0x20 ; run: %sshr_i8_i8(0x40, 34) == 0x10 + + + + +function %ishl_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm.i64 v0, 65 + return v1 +} +; run: %ishl_i64_const(0x00000000_00000004) == 0x00000000_00000008 + +function %ishl_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm.i32 v0, 33 + return v1 +} +; run: %ishl_i32_const(0x00000004) == 0x00000008 + +function %ishl_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = ishl_imm.i16 v0, 17 + return v1 +} +; run: %ishl_i16_const(0x0004) == 0x0008 + +function %ishl_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = ishl_imm.i8 v0, 9 + return v1 +} +; run: %ishl_i8_const(0x04) == 0x08 + + + +function %ushr_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = ushr_imm.i64 v0, 65 + return v1 +} +; run: %ushr_i64_const(0x40000000_40000000) == 0x20000000_20000000 + +function %ushr_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = ushr_imm.i32 v0, 33 + return v1 +} +; run: %ushr_i32_const(0x40000000) == 0x20000000 + +function %ushr_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = ushr_imm.i16 v0, 17 + return v1 +} +; run: %ushr_i16_const(0x4000) == 0x2000 + +function %ushr_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = ushr_imm.i8 v0, 9 + return v1 +} +; run: %ushr_i8_const(0x40) == 0x20 + + +function %sshr_i64_const(i64) -> i64 { +block0(v0: i64): + v1 = sshr_imm.i64 v0, 65 + return v1 +} +; run: %sshr_i64_const(0x40000000_40000000) == 0x20000000_20000000 + +function %sshr_i32_const(i32) -> i32 { +block0(v0: i32): + v1 = sshr_imm.i32 v0, 33 + return v1 +} +; run: %sshr_i32_const(0x40000000) == 0x20000000 + +function %sshr_i16_const(i16) -> i16 { +block0(v0: i16): + v1 = sshr_imm.i16 v0, 17 + return v1 +} +; run: %sshr_i16_const(0x4000) == 0x2000 + +function %sshr_i8_const(i8) -> i8 { +block0(v0: i8): + v1 = sshr_imm.i8 v0, 9 + return v1 +} +; run: %sshr_i8_const(0x40) == 0x20