From e021995323f0b43f5649d3b7f90ea0f56b9de8fd Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 8 May 2021 17:11:16 +0100 Subject: [PATCH] Allow i128 amount operands on shift instructions in the x64 backend Fixes #2727. --- cranelift/codegen/src/isa/x64/lower.rs | 2 +- .../filetests/filetests/isa/x64/i128.clif | 216 ++++++++++++++++++ .../filetests/isa/x64/shift-i128-run.clif | 93 ++++++++ 3 files changed, 310 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 6f675b9232..a1969d5642 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2046,7 +2046,7 @@ fn lower_insn_to_regs>( } ctx.emit(Inst::shift_r(size, shift_kind, count, dst)); } else if dst_ty == types::I128 { - let amt_src = put_input_in_reg(ctx, inputs[1]); + let amt_src = put_input_in_regs(ctx, inputs[1]).regs()[0]; let src = put_input_in_regs(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]); diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 1b28abfca2..c480b857bb 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -890,3 +890,219 @@ block0(v0: i8, v1: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret + +function %f30(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = ishl v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rsi +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rsi +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: shrq %cl, %rdi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rdi +; nextln: orq %rax, %rdi +; nextln: xorq %rax, %rax +; nextln: andq $$64, %rdx +; nextln: cmovzq %rdi, %rax +; nextln: cmovzq %rsi, %rcx +; nextln: cmovnzq %rsi, %rax +; nextln: movq %rax, %rdx +; nextln: movq %rcx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f31(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = ushr v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdi +; nextln: movq %rdi, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: shlq %cl, %rdi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rdi +; nextln: orq %rax, %rdi +; nextln: xorq %rax, %rax +; nextln: xorq %rcx, %rcx +; nextln: andq $$64, %rdx +; nextln: cmovzq %rsi, %rax +; nextln: cmovzq %rdi, %rcx +; nextln: cmovnzq %rsi, %rcx +; nextln: movq %rax, %rdx +; nextln: movq %rcx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f32(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = sshr v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %r8 +; nextln: movq %rsi, %rdi +; nextln: movq %rdi, %rsi +; nextln: movq %rdx, %rcx +; nextln: sarq %cl, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %r8 +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %rdi, %rax +; nextln: shlq %cl, %rax +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rax +; nextln: orq %r8, %rax +; nextln: sarq $$63, %rdi +; nextln: xorq %rcx, %rcx +; nextln: andq $$64, %rdx +; nextln: cmovzq %rsi, %rdi +; nextln: cmovzq %rax, %rcx +; nextln: cmovnzq %rsi, %rcx +; nextln: movq %rcx, %rax +; nextln: movq %rdi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f33(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = rotl v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %r8 +; nextln: movq %r8, %r9 +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %r9 +; nextln: movq %rsi, %rax +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %r8, %r10 +; nextln: shrq %cl, %r10 +; nextln: xorq %rdi, %rdi +; nextln: testq $$127, %rdx +; nextln: cmovzq %rdi, %r10 +; nextln: orq %rax, %r10 +; nextln: xorq %rax, %rax +; nextln: movq %rdx, %rcx +; nextln: andq $$64, %rcx +; nextln: cmovzq %r10, %rax +; nextln: cmovzq %r9, %rdi +; nextln: cmovnzq %r9, %rax +; nextln: movl $$128, %r9d +; nextln: subq %rdx, %r9 +; nextln: movq %rsi, %rdx +; nextln: movq %r9, %rcx +; nextln: shrq %cl, %rdx +; nextln: movq %r9, %rcx +; nextln: shrq %cl, %r8 +; nextln: movl $$64, %ecx +; nextln: subq %r9, %rcx +; nextln: shlq %cl, %rsi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %r9 +; nextln: cmovzq %rcx, %rsi +; nextln: orq %r8, %rsi +; nextln: xorq %rcx, %rcx +; nextln: xorq %r8, %r8 +; nextln: andq $$64, %r9 +; nextln: cmovzq %rdx, %rcx +; nextln: cmovzq %rsi, %r8 +; nextln: cmovnzq %rdx, %r8 +; nextln: orq %rdi, %r8 +; nextln: orq %rax, %rcx +; nextln: movq %r8, %rax +; nextln: movq %rcx, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + + +function %f34(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = rotr v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %r9 +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %r9 +; nextln: movq %rdi, %rax +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %rsi, %r10 +; nextln: shlq %cl, %r10 +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %r10 +; nextln: orq %rax, %r10 +; nextln: xorq %rax, %rax +; nextln: xorq %r8, %r8 +; nextln: movq %rdx, %rcx +; nextln: andq $$64, %rcx +; nextln: cmovzq %r9, %rax +; nextln: cmovzq %r10, %r8 +; nextln: cmovnzq %r9, %r8 +; nextln: movl $$128, %r9d +; nextln: subq %rdx, %r9 +; nextln: movq %rdi, %rdx +; nextln: movq %r9, %rcx +; nextln: shlq %cl, %rdx +; nextln: movq %rsi, %r10 +; nextln: movq %r9, %rcx +; nextln: shlq %cl, %r10 +; nextln: movl $$64, %ecx +; nextln: subq %r9, %rcx +; nextln: shrq %cl, %rdi +; nextln: xorq %rsi, %rsi +; nextln: testq $$127, %r9 +; nextln: cmovzq %rsi, %rdi +; nextln: orq %r10, %rdi +; nextln: xorq %rcx, %rcx +; nextln: andq $$64, %r9 +; nextln: cmovzq %rdi, %rcx +; nextln: cmovzq %rdx, %rsi +; nextln: cmovnzq %rdx, %rcx +; nextln: orq %r8, %rsi +; nextln: orq %rax, %rcx +; nextln: movq %rsi, %rax +; nextln: movq %rcx, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret \ No newline at end of file diff --git a/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif b/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif index 3a076ed536..029444eea0 100644 --- a/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif @@ -71,3 +71,96 @@ block0(v0: i64, v1: i64, v2: i8): ; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] ; run: %rotr(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] ; run: %rotr(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] + + +; i128 amount operand tests +function %ishl_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = ishl.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] +; run: %ishl_amt_i128(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ishl_amt_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] +; run: %ishl_amt_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] +; run: %ishl_amt_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] +; run: %ishl_amt_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] +; run: %ishl_amt_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] + +function %ushr_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = ushr.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ushr_amt_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] +; run: %ushr_amt_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] +; run: %ushr_amt_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] +; run: %ushr_amt_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] +; run: %ushr_amt_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] + +function %sshr_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = sshr.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %sshr_amt_i128(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] +; run: %sshr_amt_i128(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] +; run: %sshr_amt_i128(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] +; run: %sshr_amt_i128(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] + +function %rotl_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = rotl.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202] +; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202] +; run: %rotl_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] +; run: %rotl_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] + +function %rotr_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = rotr.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080] +; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] +; run: %rotr_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] +; run: %rotr_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303]