Use 'xor r, r' to set registers to 0 instead of mov (#766)

2019-09-16 14:35:55 +00:00
parent b95508c51a
commit 99380fad1a
9 changed files with 193 additions and 7 deletions
--- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif
+++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif
@@ -0,0 +1,17 @@
+; Check that floating-point and integer constants equal to zero are optimized correctly.
+test binemit
+target i686
+
+function %foo() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm0, %xmm0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
+  return v0
+}
+
+function %bar() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm0, %xmm0
+  [-,%xmm0]    v1 = f64const 0.0     ; bin: 66 0f 57 c0
+  return v1
+}
--- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif
+++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif
@@ -0,0 +1,31 @@
+; Check that floating-point constants equal to zero are optimized correctly.
+test binemit
+target x86_64
+
+function %zero_const_32bit_no_rex() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm0, %xmm0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 40 0f 57 c0
+  return v0
+}
+
+function %zero_const_32bit_rex() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm8, %xmm8
+  [-,%xmm8]    v1 = f32const 0.0     ; bin: 45 0f 57 c0
+  return v1
+}
+
+function %zero_const_64bit_no_rex() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm0, %xmm0
+  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 40 0f 57 c0
+  return v0
+}
+
+function %zero_const_64bit_rex() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm8, %xmm8
+  [-,%xmm8]    v1 = f64const 0.0     ; bin: 66 45 0f 57 c0
+  return v1
+}
--- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
+++ b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
@@ -5,9 +5,9 @@ function u0:0(i8) -> i8 fast {
 ebb0(v0: i8):
    v1 = iconst.i8 0
    v2 = isub v1, v0
-    ; check: v4 = uextend.i32 v0
-    ; nextln: v6 = iconst.i32 0
-    ; nextln = isub v6, v4
-    ; nextln = ireduce.i8 v5
+    ; check: v3 = uextend.i32 v0
+    ; nextln: v5 = iconst.i32 0
+    ; nextln = isub v5, v3
+    ; nextln = ireduce.i8 v4
    return v2
 }
--- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif
+++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif
@@ -1,5 +1,6 @@
-; Check that floating-point constants equal to zero are optimized correctly.
+; Check that floating-point and integer constants equal to zero are optimized correctly.
 test binemit
+set opt_level=best
 target i686

 function %foo() -> f32 fast {
@@ -16,3 +17,36 @@ ebb0:
  return v1
 }

+function %zero_dword() -> i32 fast {
+ebb0:
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i32 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i32 0     ; bin: 31 ff
+  return v0
+}
+
+function %zero_word() -> i16 fast {
+ebb0:
+  ; while you may expect this to be encoded like 6631c0, aka
+  ; xor %ax, %ax, the upper 16 bits of the register used for
+  ; i16 are left undefined, so it's not wrong to clear them.
+  ;
+  ; discarding the 66 prefix is shorter, so this test expects
+  ; that we do so.
+  ;
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i16 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i16 0     ; bin: 31 ff
+  return v0
+}
+
+function %zero_byte() -> i8 fast {
+ebb0:
+  ; asm: xor %al, %al
+  [-,%rax]     v0 = iconst.i8 0     ; bin: 30 c0
+  ; asm: xor %dh, %dh
+  [-,%rdi]     v1 = iconst.i8 0     ; bin: 30 ff
+  return v0
+}
--- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif
+++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif
@@ -1,11 +1,12 @@
 ; Check that floating-point constants equal to zero are optimized correctly.
 test binemit
+set opt_level=best
 target x86_64

 function %zero_const_32bit_no_rex() -> f32 fast {
 ebb0:
  ; asm: xorps %xmm0, %xmm0
-  [-,%xmm0]    v0 = f32const 0.0     ; bin: 40 0f 57 c0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
  return v0
 }

@@ -19,7 +20,7 @@ ebb0:
 function %zero_const_64bit_no_rex() -> f64 fast {
 ebb0:
  ; asm: xorpd %xmm0, %xmm0
-  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 40 0f 57 c0
+  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 0f 57 c0
  return v0
 }

@@ -30,3 +31,42 @@ ebb0:
  return v1
 }

+function %imm_zero_register() -> i64 fast {
+ebb0:
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i64 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i64 0     ; bin: 31 ff
+  ; asm: xor %r8, r8
+  [-,%r8]      v2 = iconst.i64 0     ; bin: 45 31 c0
+  ; asm: xor %r15, %r15
+  [-,%r15]     v4 = iconst.i64 0     ; bin: 45 31 ff
+  return v0
+}
+
+function %zero_word() -> i16 fast {
+ebb0:
+  ; while you may expect this to be encoded like 6631c0, aka
+  ; xor %ax, %ax, the upper 16 bits of the register used for
+  ; i16 are left undefined, so it's not wrong to clear them.
+  ;
+  ; discarding the 66 prefix is shorter, so this test expects
+  ; that we do so.
+  ;
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i16 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i16 0     ; bin: 31 ff
+  return v0
+}
+
+function %zero_byte() -> i8 fast {
+ebb0:
+  ; asm: xor %r8b, %r8b
+  [-,%r15]     v0 = iconst.i8 0     ; bin: 45 30 ff
+  ; asm: xor %al, %al
+  [-,%rax]     v1 = iconst.i8 0     ; bin: 30 c0
+  ; asm: xor %dh, %dh
+  [-,%rdi]     v2 = iconst.i8 0     ; bin: 30 ff
+  return v0
+}