x64: port select to ISLE (#3682)

* x64: port `select` using an FP comparison to ISLE This change includes quite a few interlocking parts, required mainly by the current x64 conventions in ISLE: - it adds a way to emit a `cmove` with multiple OR-ing conditions; because x64 ISLE cannot currently safely emit a comparison followed by several jumps, this adds `MachInst::CmoveOr` and `MachInst::XmmCmoveOr` macro instructions. Unfortunately, these macro instructions hide the multi-instruction sequence in `lower.isle` - to properly keep track of what instructions consume and produce flags, @cfallin added a way to pass around variants of `ConsumesFlags` and `ProducesFlags`--these changes affect all backends - then, to lower the `fcmp + select` CLIF, this change adds several `cmove*_from_values` helpers that perform all of the awkward conversions between `Value`, `ValueReg`, `Reg`, and `Gpr/Xmm`; one upside is that now these lowerings have much-improved documentation explaining why the various `FloatCC` and `CC` choices are made the the way they are. Co-authored-by: Chris Fallin <chris@cfallin.org>
2022-02-23 10:03:16 -08:00
parent 5a5e401a9c
commit f87c61176a
20 changed files with 3163 additions and 2272 deletions
--- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
@@ -43,7 +43,7 @@ block0(v0: f64, v1: i64):
 ;   Entry block: 0
 ; Block 0:
 ;   (original IR block: block0)
-;   (instruction range: 0 .. 17)
+;   (instruction range: 0 .. 16)
 ;   Inst 0:   pushq   %rbp
 ;   Inst 1:   movq    %rsp, %rbp
 ;   Inst 2:   movsd   0(%rdi), %xmm1
@@ -52,14 +52,12 @@ block0(v0: f64, v1: i64):
 ;   Inst 5:   setz    %sil
 ;   Inst 6:   andl    %edi, %esi
 ;   Inst 7:   andq    $1, %rsi
-;   Inst 8:   ucomisd %xmm1, %xmm0
+;   Inst 8:   ucomisd %xmm0, %xmm1
 ;   Inst 9:   movaps  %xmm0, %xmm1
-;   Inst 10:   jnp $next; movsd %xmm0, %xmm1; $next: 
-;   Inst 11:   jz $next; movsd %xmm0, %xmm1; $next: 
-;   Inst 12:   movq    %rsi, %rax
-;   Inst 13:   movaps  %xmm1, %xmm0
-;   Inst 14:   movq    %rbp, %rsp
-;   Inst 15:   popq    %rbp
-;   Inst 16:   ret
+;   Inst 10:   jz $check; movsd %xmm0, %xmm1; $check: jnp $next; movsd %xmm0, %xmm1; $next
+;   Inst 11:   movq    %rsi, %rax
+;   Inst 12:   movaps  %xmm1, %xmm0
+;   Inst 13:   movq    %rbp, %rsp
+;   Inst 14:   popq    %rbp
+;   Inst 15:   ret
 ; }}
-
--- a/cranelift/filetests/filetests/runtests/select.clif
+++ b/cranelift/filetests/filetests/runtests/select.clif
@@ -0,0 +1,80 @@
+test interpret
+test run
+target x86_64
+
+function %select_eq_f32(f32, f32) -> i32 {
+block0(v0: f32, v1: f32):
+    v2 = fcmp eq v0, v1
+    v3 = iconst.i32 1
+    v4 = iconst.i32 0
+    v5 = select v2, v3, v4
+    return v5
+}
+; run: %select_eq_f32(0x42.42, 0x42.42) == 1
+; run: %select_eq_f32(0x42.42, 0.0) == 0
+; run: %select_eq_f32(0x42.42, NaN) == 0
+
+function %select_ne_f64(f64, f64) -> i32 {
+block0(v0: f64, v1: f64):
+    v2 = fcmp ne v0, v1
+    v3 = iconst.i32 1
+    v4 = iconst.i32 0
+    v5 = select v2, v3, v4
+    return v5
+}
+; run: %select_ne_f64(0x42.42, 0x42.42) == 0
+; run: %select_ne_f64(0x42.42, 0.0) == 1
+; run: %select_ne_f64(NaN, NaN) == 1
+
+function %select_gt_f64(f64, f64) -> b1 {
+block0(v0: f64, v1: f64):
+    v2 = fcmp gt v0, v1
+    v3 = bconst.b1 true
+    v4 = bconst.b1 false
+    v5 = select v2, v3, v4
+    return v5
+}
+; run: %select_gt_f64(0x42.42, 0.0) == true
+; run: %select_gt_f64(0.0, 0.0) == false
+; run: %select_gt_f64(0x0.0, 0x42.42) == false
+; run: %select_gt_f64(NaN, 0x42.42) == false
+
+function %select_ge_f64(f64, f64) -> i64 {
+block0(v0: f64, v1: f64):
+    v2 = fcmp ge v0, v1
+    v3 = iconst.i64 1
+    v4 = iconst.i64 0
+    v5 = select v2, v3, v4
+    return v5
+}
+; run: %select_ge_f64(0x42.42, 0.0) == 1
+; run: %select_ge_f64(0.0, 0.0) == 1
+; run: %select_ge_f64(0x0.0, 0x42.42) == 0
+; run: %select_ge_f64(0x0.0, NaN) == 0
+
+function %select_le_f32(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+    v2 = fcmp le v0, v1
+    v3 = f32const 0x1.0
+    v4 = f32const 0x0.0
+    v5 = select v2, v3, v4
+    return v5
+}
+; runx: %select_le_f32(0x42.42, 0.0) == 0x0.0
+; run: %select_le_f32(0.0, 0.0) == 0x1.0
+; run: %select_le_f32(0x0.0, 0x42.42) == 0x1.0
+; run: %select_le_f32(0x0.0, NaN) == 0x0.0
+
+function %select_uno_f32(f32, f32) -> i8 {
+block0(v0: f32, v1: f32):
+    v2 = fcmp uno v0, v1
+    v3 = iconst.i8 1
+    v4 = iconst.i8 0
+    v5 = select v2, v3, v4
+    return v5
+}
+; run: %select_uno_f32(0x42.42, 0.0) == 0
+; run: %select_uno_f32(0.0, 0.0) == 0
+; run: %select_uno_f32(0x0.0, 0x42.42) == 0
+; run: %select_uno_f32(0x0.0, NaN) == 1
+; run: %select_uno_f32(-NaN, 0x42.42) == 1