diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton index 14f050c2e7..4cc14b6905 100644 --- a/cranelift/filetests/isa/intel/binary32.cton +++ b/cranelift/filetests/isa/intel/binary32.cton @@ -55,11 +55,14 @@ ebb0: [-,%rsi] v24 = sshr v2, v1 ; bin: d3 fe ; asm: sarl %cl, %ecx [-,%rcx] v25 = sshr v1, v1 ; bin: d3 f9 - - ; asm: movl %esi, %ecx - [-,%rcx] v26 = copy v2 ; bin: 89 f1 - ; asm: movl %ecx, %esi - [-,%rsi] v27 = copy v1 ; bin: 89 ce + ; asm: roll %cl, %esi + [-,%rsi] v26 = rotl v2, v1 ; bin: d3 c6 + ; asm: roll %cl, %ecx + [-,%rcx] v27 = rotl v1, v1 ; bin: d3 c1 + ; asm: rorl %cl, %esi + [-,%rsi] v28 = rotr v2, v1 ; bin: d3 ce + ; asm: rorl %cl, %ecx + [-,%rcx] v29 = rotr v1, v1 ; bin: d3 c9 ; Integer Register - Immediate 8-bit operations. ; The 8-bit immediate is sign-extended. @@ -102,6 +105,13 @@ ebb0: ; asm: xorl $1000000, %esi [-,%rsi] v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240 + ; Register copies. + + ; asm: movl %esi, %ecx + [-,%rcx] v80 = copy v2 ; bin: 89 f1 + ; asm: movl %ecx, %esi + [-,%rsi] v81 = copy v1 ; bin: 89 ce + ; Load/Store instructions. ; Register indirect addressing with no displacement. diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton index 48e328b455..e9ee5d3941 100644 --- a/cranelift/filetests/isa/intel/binary64.cton +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -65,13 +65,27 @@ ebb0: ; asm: xorq %rcx, %r10 [-,%r10] v52 = bxor v3, v1 ; bin: 49 31 ca - ; asm: movq %rsi, %rcx - [-,%rcx] v60 = copy v2 ; bin: 48 89 f1 - ; asm: movq %r10, %rsi - [-,%rsi] v61 = copy v3 ; bin: 4c 89 d6 - ; asm: movq %rcx, %r10 - [-,%r10] v62 = copy v1 ; bin: 49 89 ca + ; asm: shlq %cl, %rsi + [-,%rsi] v60 = ishl v2, v1 ; bin: 48 d3 e6 + ; asm: shlq %cl, %r10 + [-,%r10] v61 = ishl v3, v1 ; bin: 49 d3 e2 + ; asm: sarq %cl, %rsi + [-,%rsi] v62 = sshr v2, v1 ; bin: 48 d3 fe + ; asm: sarq %cl, %r10 + [-,%r10] v63 = sshr v3, v1 ; bin: 49 d3 fa + ; asm: shrq %cl, %rsi + [-,%rsi] v64 = ushr v2, v1 ; bin: 48 d3 ee + ; asm: shrq %cl, %r10 + [-,%r10] v65 = ushr v3, v1 ; bin: 49 d3 ea + ; asm: rolq %cl, %rsi + [-,%rsi] v66 = rotl v2, v1 ; bin: 48 d3 c6 + ; asm: rolq %cl, %r10 + [-,%r10] v67 = rotl v3, v1 ; bin: 49 d3 c2 + ; asm: rorq %cl, %rsi + [-,%rsi] v68 = rotr v2, v1 ; bin: 48 d3 ce + ; asm: rorq %cl, %r10 + [-,%r10] v69 = rotr v3, v1 ; bin: 49 d3 ca ; Integer Register-Immediate Operations. ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. @@ -122,6 +136,15 @@ ebb0: ; asm: xorq $-100, %r14 [-,%r14] v104 = bxor_imm v5, -100 ; bin: 49 83 f6 9c + ; Register copies. + + ; asm: movq %rsi, %rcx + [-,%rcx] v110 = copy v2 ; bin: 48 89 f1 + ; asm: movq %r10, %rsi + [-,%rsi] v111 = copy v3 ; bin: 4c 89 d6 + ; asm: movq %rcx, %r10 + [-,%r10] v112 = copy v1 ; bin: 49 89 ca + return ; bin: c3 } @@ -187,13 +210,27 @@ ebb0: ; asm: xorl %ecx, %r10d [-,%r10] v52 = bxor v3, v1 ; bin: 41 31 ca - ; asm: movl %esi, %ecx - [-,%rcx] v60 = copy v2 ; bin: 40 89 f1 - ; asm: movl %r10d, %esi - [-,%rsi] v61 = copy v3 ; bin: 44 89 d6 - ; asm: movl %ecx, %r10d - [-,%r10] v62 = copy v1 ; bin: 41 89 ca + ; asm: shll %cl, %esi + [-,%rsi] v60 = ishl v2, v1 ; bin: 40 d3 e6 + ; asm: shll %cl, %r10d + [-,%r10] v61 = ishl v3, v1 ; bin: 41 d3 e2 + ; asm: sarl %cl, %esi + [-,%rsi] v62 = sshr v2, v1 ; bin: 40 d3 fe + ; asm: sarl %cl, %r10d + [-,%r10] v63 = sshr v3, v1 ; bin: 41 d3 fa + ; asm: shrl %cl, %esi + [-,%rsi] v64 = ushr v2, v1 ; bin: 40 d3 ee + ; asm: shrl %cl, %r10d + [-,%r10] v65 = ushr v3, v1 ; bin: 41 d3 ea + ; asm: roll %cl, %esi + [-,%rsi] v66 = rotl v2, v1 ; bin: 40 d3 c6 + ; asm: roll %cl, %r10d + [-,%r10] v67 = rotl v3, v1 ; bin: 41 d3 c2 + ; asm: rorl %cl, %esi + [-,%rsi] v68 = rotr v2, v1 ; bin: 40 d3 ce + ; asm: rorl %cl, %r10d + [-,%r10] v69 = rotr v3, v1 ; bin: 41 d3 ca ; Integer Register-Immediate Operations. ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. @@ -244,5 +281,14 @@ ebb0: ; asm: xorl $-100, %r14d [-,%r14] v104 = bxor_imm v5, -100 ; bin: 41 83 f6 9c + ; Register copies. + + ; asm: movl %esi, %ecx + [-,%rcx] v110 = copy v2 ; bin: 40 89 f1 + ; asm: movl %r10d, %esi + [-,%rsi] v111 = copy v3 ; bin: 44 89 d6 + ; asm: movl %ecx, %r10d + [-,%r10] v112 = copy v1 ; bin: 41 89 ca + return ; bin: c3 } diff --git a/cranelift/filetests/wasm/i32-arith.cton b/cranelift/filetests/wasm/i32-arith.cton index 1096d70ddb..f2fafffee3 100644 --- a/cranelift/filetests/wasm/i32-arith.cton +++ b/cranelift/filetests/wasm/i32-arith.cton @@ -1,7 +1,10 @@ ; Test basic code generation for i32 arithmetic WebAssembly instructions. test compile -set is_64bit +set is_64bit=0 +isa intel + +set is_64bit=1 isa intel ; Constants. @@ -55,8 +58,32 @@ ebb0(v0: i32, v1: i32): return v2 } -; function %i32_shl(i32, i32) -> i32 -; function %i32_shr_s(i32, i32) -> i32 -; function %i32_shr_u(i32, i32) -> i32 -; function %i32_rotl(i32, i32) -> i32 -; function %i32_rotr(i32, i32) -> i32 +function %i32_shl(i32, i32) -> i32 { +ebb0(v0: i32, v1: i32): + v2 = ishl v0, v1 + return v2 +} + +function %i32_shr_s(i32, i32) -> i32 { +ebb0(v0: i32, v1: i32): + v2 = sshr v0, v1 + return v2 +} + +function %i32_shr_u(i32, i32) -> i32 { +ebb0(v0: i32, v1: i32): + v2 = ushr v0, v1 + return v2 +} + +function %i32_rotl(i32, i32) -> i32 { +ebb0(v0: i32, v1: i32): + v2 = rotl v0, v1 + return v2 +} + +function %i32_rotr(i32, i32) -> i32 { +ebb0(v0: i32, v1: i32): + v2 = rotr v0, v1 + return v2 +} diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index ab32e77303..df6631fe4c 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -66,12 +66,19 @@ I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1)) # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix. I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1)) -# 32-bit shifts and rotates. +# Shifts and rotates. # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit # and 16-bit shifts would need explicit masking. -I32.enc(base.ishl.i32.i32, *r.rc(0xd3, rrr=4)) -I32.enc(base.ushr.i32.i32, *r.rc(0xd3, rrr=5)) -I32.enc(base.sshr.i32.i32, *r.rc(0xd3, rrr=7)) +for inst, rrr in [ + (base.rotl, 0), + (base.rotr, 1), + (base.ishl, 4), + (base.ushr, 5), + (base.sshr, 7)]: + I32.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr)) + I64.enc(inst.i64.i64, *r.rc.rex(0xd3, rrr=rrr, w=1)) + I64.enc(inst.i32.i32, *r.rc.rex(0xd3, rrr=rrr)) + I64.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr)) # Loads and stores. I32.enc(base.store.i32.i32, *r.st(0x89))