Add Intel encodings for shift and rotate instructions.

2017-07-12 12:53:41 -07:00
parent 1f52415b17
commit f57c666d8a
4 changed files with 117 additions and 27 deletions
--- a/cranelift/filetests/isa/intel/binary32.cton
+++ b/cranelift/filetests/isa/intel/binary32.cton
@@ -55,11 +55,14 @@ ebb0:
    [-,%rsi]             v24 = sshr v2, v1       ; bin: d3 fe
    ; asm: sarl %cl, %ecx
    [-,%rcx]             v25 = sshr v1, v1       ; bin: d3 f9
-
+    ; asm: roll %cl, %esi
-    ; asm: movl %esi, %ecx
+    [-,%rsi]             v26 = rotl v2, v1       ; bin: d3 c6
-    [-,%rcx]             v26 = copy v2           ; bin: 89 f1
+    ; asm: roll %cl, %ecx
-    ; asm: movl %ecx, %esi
+    [-,%rcx]             v27 = rotl v1, v1       ; bin: d3 c1
-    [-,%rsi]             v27 = copy v1           ; bin: 89 ce
+    ; asm: rorl %cl, %esi
    [-,%rsi]             v28 = rotr v2, v1       ; bin: d3 ce
    ; asm: rorl %cl, %ecx
    [-,%rcx]             v29 = rotr v1, v1       ; bin: d3 c9
    ; Integer Register - Immediate 8-bit operations.
    ; The 8-bit immediate is sign-extended.
@@ -102,6 +105,13 @@ ebb0:
    ; asm: xorl $1000000, %esi
    [-,%rsi]             v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240
    ; Register copies.
    ; asm: movl %esi, %ecx
    [-,%rcx]             v80 = copy v2           ; bin: 89 f1
    ; asm: movl %ecx, %esi
    [-,%rsi]             v81 = copy v1           ; bin: 89 ce
    ; Load/Store instructions.
    ; Register indirect addressing with no displacement.
--- a/cranelift/filetests/isa/intel/binary64.cton
+++ b/cranelift/filetests/isa/intel/binary64.cton
@@ -65,13 +65,27 @@ ebb0:
    ; asm: xorq %rcx, %r10
    [-,%r10]             v52 = bxor v3, v1       ; bin: 49 31 ca
-    ; asm: movq %rsi, %rcx
+    ; asm: shlq %cl, %rsi
-    [-,%rcx]             v60 = copy v2           ; bin: 48 89 f1
+    [-,%rsi]             v60 = ishl v2, v1       ; bin: 48 d3 e6
-    ; asm: movq %r10, %rsi
+    ; asm: shlq %cl, %r10
-    [-,%rsi]             v61 = copy v3           ; bin: 4c 89 d6
+    [-,%r10]             v61 = ishl v3, v1       ; bin: 49 d3 e2
-    ; asm: movq %rcx, %r10
+    ; asm: sarq %cl, %rsi
-    [-,%r10]             v62 = copy v1           ; bin: 49 89 ca
+    [-,%rsi]             v62 = sshr v2, v1       ; bin: 48 d3 fe
    ; asm: sarq %cl, %r10
    [-,%r10]             v63 = sshr v3, v1       ; bin: 49 d3 fa
    ; asm: shrq %cl, %rsi
    [-,%rsi]             v64 = ushr v2, v1       ; bin: 48 d3 ee
    ; asm: shrq %cl, %r10
    [-,%r10]             v65 = ushr v3, v1       ; bin: 49 d3 ea
    ; asm: rolq %cl, %rsi
    [-,%rsi]             v66 = rotl v2, v1       ; bin: 48 d3 c6
    ; asm: rolq %cl, %r10
    [-,%r10]             v67 = rotl v3, v1       ; bin: 49 d3 c2
    ; asm: rorq %cl, %rsi
    [-,%rsi]             v68 = rotr v2, v1       ; bin: 48 d3 ce
    ; asm: rorq %cl, %r10
    [-,%r10]             v69 = rotr v3, v1       ; bin: 49 d3 ca
    ; Integer Register-Immediate Operations.
    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
@@ -122,6 +136,15 @@ ebb0:
    ; asm: xorq $-100, %r14
    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 49 83 f6 9c
    ; Register copies.
    ; asm: movq %rsi, %rcx
    [-,%rcx]             v110 = copy v2          ; bin: 48 89 f1
    ; asm: movq %r10, %rsi
    [-,%rsi]             v111 = copy v3          ; bin: 4c 89 d6
    ; asm: movq %rcx, %r10
    [-,%r10]             v112 = copy v1          ; bin: 49 89 ca
    return                                       ; bin: c3
 }
@@ -187,13 +210,27 @@ ebb0:
    ; asm: xorl %ecx, %r10d
    [-,%r10]             v52 = bxor v3, v1       ; bin: 41 31 ca
-    ; asm: movl %esi, %ecx
+    ; asm: shll %cl, %esi
-    [-,%rcx]             v60 = copy v2           ; bin: 40 89 f1
+    [-,%rsi]             v60 = ishl v2, v1       ; bin: 40 d3 e6
-    ; asm: movl %r10d, %esi
+    ; asm: shll %cl, %r10d
-    [-,%rsi]             v61 = copy v3           ; bin: 44 89 d6
+    [-,%r10]             v61 = ishl v3, v1       ; bin: 41 d3 e2
-    ; asm: movl %ecx, %r10d
+    ; asm: sarl %cl, %esi
-    [-,%r10]             v62 = copy v1           ; bin: 41 89 ca
+    [-,%rsi]             v62 = sshr v2, v1       ; bin: 40 d3 fe
    ; asm: sarl %cl, %r10d
    [-,%r10]             v63 = sshr v3, v1       ; bin: 41 d3 fa
    ; asm: shrl %cl, %esi
    [-,%rsi]             v64 = ushr v2, v1       ; bin: 40 d3 ee
    ; asm: shrl %cl, %r10d
    [-,%r10]             v65 = ushr v3, v1       ; bin: 41 d3 ea
    ; asm: roll %cl, %esi
    [-,%rsi]             v66 = rotl v2, v1       ; bin: 40 d3 c6
    ; asm: roll %cl, %r10d
    [-,%r10]             v67 = rotl v3, v1       ; bin: 41 d3 c2
    ; asm: rorl %cl, %esi
    [-,%rsi]             v68 = rotr v2, v1       ; bin: 40 d3 ce
    ; asm: rorl %cl, %r10d
    [-,%r10]             v69 = rotr v3, v1       ; bin: 41 d3 ca
    ; Integer Register-Immediate Operations.
    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
@@ -244,5 +281,14 @@ ebb0:
    ; asm: xorl $-100, %r14d
    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 41 83 f6 9c
    ; Register copies.
    ; asm: movl %esi, %ecx
    [-,%rcx]             v110 = copy v2          ; bin: 40 89 f1
    ; asm: movl %r10d, %esi
    [-,%rsi]             v111 = copy v3          ; bin: 44 89 d6
    ; asm: movl %ecx, %r10d
    [-,%r10]             v112 = copy v1          ; bin: 41 89 ca
    return                                       ; bin: c3
 }
--- a/cranelift/filetests/wasm/i32-arith.cton
+++ b/cranelift/filetests/wasm/i32-arith.cton
@@ -1,7 +1,10 @@
 ; Test basic code generation for i32 arithmetic WebAssembly instructions.
 test compile
-set is_64bit
+set is_64bit=0
 isa intel
 set is_64bit=1
 isa intel
 ; Constants.
@@ -55,8 +58,32 @@ ebb0(v0: i32, v1: i32):
    return v2
 }
-; function %i32_shl(i32, i32) -> i32
+function %i32_shl(i32, i32) -> i32 {
-; function %i32_shr_s(i32, i32) -> i32
+ebb0(v0: i32, v1: i32):
-; function %i32_shr_u(i32, i32) -> i32
+    v2 = ishl v0, v1
-; function %i32_rotl(i32, i32) -> i32
+    return v2
-; function %i32_rotr(i32, i32) -> i32
+}
 function %i32_shr_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = sshr v0, v1
    return v2
 }
 function %i32_shr_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = ushr v0, v1
    return v2
 }
 function %i32_rotl(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = rotl v0, v1
    return v2
 }
 function %i32_rotr(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = rotr v0, v1
    return v2
 }
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -66,12 +66,19 @@ I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
 # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
 I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
-# 32-bit shifts and rotates.
+# Shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
 # and 16-bit shifts would need explicit masking.
-I32.enc(base.ishl.i32.i32, *r.rc(0xd3, rrr=4))
+for inst,           rrr in [
-I32.enc(base.ushr.i32.i32, *r.rc(0xd3, rrr=5))
+        (base.rotl, 0),
-I32.enc(base.sshr.i32.i32, *r.rc(0xd3, rrr=7))
+        (base.rotr, 1),
        (base.ishl, 4),
        (base.ushr, 5),
        (base.sshr, 7)]:
    I32.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr))
    I64.enc(inst.i64.i64, *r.rc.rex(0xd3, rrr=rrr, w=1))
    I64.enc(inst.i32.i32, *r.rc.rex(0xd3, rrr=rrr))
    I64.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr))
 # Loads and stores.
 I32.enc(base.store.i32.i32, *r.st(0x89))