From 690ea640b3280a4989e284d330b9d865f26fdcf5 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 26 Jul 2021 19:47:15 +0200 Subject: [PATCH 1/4] Implement fmin_pseudo and fmax_pseudo for scalars --- cranelift/codegen/src/isa/x64/lower.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 234ba3d3f6..6bd6ea7b72 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -4413,6 +4413,10 @@ fn lower_insn_to_regs>( let ty = ty.unwrap(); ctx.emit(Inst::gen_move(dst, rhs, ty)); let sse_opcode = match (ty, op) { + (types::F32, Opcode::FminPseudo) => SseOpcode::Minss, + (types::F32, Opcode::FmaxPseudo) => SseOpcode::Maxss, + (types::F64, Opcode::FminPseudo) => SseOpcode::Minsd, + (types::F64, Opcode::FmaxPseudo) => SseOpcode::Maxsd, (types::F32X4, Opcode::FminPseudo) => SseOpcode::Minps, (types::F32X4, Opcode::FmaxPseudo) => SseOpcode::Maxps, (types::F64X2, Opcode::FminPseudo) => SseOpcode::Minpd, From a6598c310a84d4c707f1be9d990be11d71ad147c Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 27 Jul 2021 12:00:42 +0200 Subject: [PATCH 2/4] Remove empty preopt.serialized file It was added in #2312 --- cranelift/codegen/src/preopt.serialized | Bin 5511 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 cranelift/codegen/src/preopt.serialized diff --git a/cranelift/codegen/src/preopt.serialized b/cranelift/codegen/src/preopt.serialized deleted file mode 100644 index 353373491a3b2e704f137c67f4e115f2b251b001..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5511 zcmcIoS8fzR5S_{>#n)K!idnc&(8+5fb1r<22!GMR2D0!+|M9Vgerp^svPwV_W(*{aAr!Uv}4gtZ{6HV$% zoIc4_WM$Es??hZv=gg}t*7-X#mCv+uzsC&lI_2FM{T;Xa_L3FZ=f))7^0jK?N2%pk zf0$Zn{3x~jdJR*nE*{HJlCMicyqC7V7TNFHF9*2KBU!nsQ$x!%MtnBtHw>TVGpy~S z?|;zwP)#H%a~`A+NPSV29LG~TguzcNk)0cU4j5Ta{C**7A)C+h436AwJ0c)>|Jkk9 zxmq^@nmaxUZ@i+6w>O;4#JaQF_LR;*>1xuK>-@NY;Od7a&0d@~Tay!zntHRj``%^= z48-1i4a&LO?&rmhm^XLZ)A&(py;74UuWI>oWAm@R;n+LI&yeS4l&T&(%na+|)wr)q zTfB2caaMsfKIe*XKfNQt`TPv?_369l;_a^K^ed9QIf~JWgjn3gn8EOMZFkhAk8X$W z_^O#^%Xi3VQociaeNA?F8DqQ-nQ0nB#2&6l37qr2l-MSrU%>F4q_fv=D*pBw6w6;r zq?;IwwKb+ivQ_Gozy5BRBRqNjNw+Z0WB8t0$&TD+Am|RpISgONR?v2#UDja~t>#gk zd$8o*2=ucDp&%CbG0sqM^Z@c{r&D~0p+9`&dITX?kDn72S=Gldqo7482l@6F;@yYkAkLKQ1$k)*JF2 Date: Fri, 27 Aug 2021 16:59:04 +0200 Subject: [PATCH 3/4] Add tests --- .../filetests/runtests/fmin-max-pseudo.clif | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif new file mode 100644 index 0000000000..15a716298e --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif @@ -0,0 +1,81 @@ +test run +; target s390x TODO: Not yet implemented on s390x +set enable_simd +target x86_64 machinst skylake + +function %fmin_pseudo_f32(f32, f32) -> f32 { +block0(v0:f32, v1:f32): + v2 = fmin_pseudo v0, v1 + return v2 +} +; run: %fmin_pseudo_f32(0x1.0, 0x2.0) == 0x1.0 +; run: %fmin_pseudo_f32(NaN, 0x2.0) == NaN +; run: %fmin_pseudo_f32(0x0.1, NaN) == 0x0.1 +; run: %fmin_pseudo_f32(0x0.0, -0x0.0) == 0x0.0 +; run: %fmin_pseudo_f32(-0x0.0, 0x0.0) == -0x0.0 + +function %fmax_pseudo_f32(f32, f32) -> f32 { +block0(v0:f32, v1:f32): + v2 = fmax_pseudo v0, v1 + return v2 +} +; run: %fmax_pseudo_f32(0x1.0, 0x2.0) == 0x2.0 +; run: %fmax_pseudo_f32(NaN, 0x2.0) == NaN +; run: %fmax_pseudo_f32(0x0.1, NaN) == 0x0.1 +; run: %fmax_pseudo_f32(0x0.0, 0x0.0) == 0x0.0 +; run: %fmax_pseudo_f32(-0x0.0, 0x0.0) == -0x0.0 + +function %fmin_pseudo_f64(f64, f64) -> f64 { +block0(v0:f64, v1:f64): + v2 = fmin_pseudo v0, v1 + return v2 +} +; run: %fmin_pseudo_f64(0x1.0, 0x2.0) == 0x1.0 +; run: %fmin_pseudo_f64(NaN, 0x2.0) == NaN +; run: %fmin_pseudo_f64(0x0.1, NaN) == 0x0.1 +; run: %fmin_pseudo_f64(0x0.0, -0x0.0) == 0x0.0 +; run: %fmin_pseudo_f64(-0x0.0, 0x0.0) == -0x0.0 + +function %fmax_pseudo_f64(f64, f64) -> f64 { +block0(v0:f64, v1:f64): + v2 = fmax_pseudo v0, v1 + return v2 +} +; run: %fmax_pseudo_f64(0x1.0, 0x2.0) == 0x2.0 +; run: %fmax_pseudo_f64(NaN, 0x2.0) == NaN +; run: %fmax_pseudo_f64(0x0.1, NaN) == 0x0.1 +; run: %fmax_pseudo_f64(0x0.0, 0x0.0) == 0x0.0 +; run: %fmax_pseudo_f64(-0x0.0, 0x0.0) == -0x0.0 + +target aarch64 ; TODO scalar fmin_pseudo and fmax_pseudo are unimplemented for AArch64 + +function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0:f32x4, v1:f32x4): + v2 = fmin_pseudo v0, v1 + return v2 +} +; run: %fmin_pseudo_f32x4([0x1.0 NaN 0x0.1 -0x0.0], [0x2.0 0x2.0 NaN 0x0.0]) == [0x1.0 NaN 0x0.1 -0x0.0] + +function %fmax_pseudo_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0:f32x4, v1:f32x4): + v2 = fmax_pseudo v0, v1 + return v2 +} +; run: %fmax_pseudo_f32x4([0x1.0 NaN 0x0.1 -0x0.0], [0x2.0 0x2.0 NaN 0x0.0]) == [0x2.0 NaN 0x0.1 -0x0.0] + +function %fmin_pseudo_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0:f64x2, v1:f64x2): + v2 = fmin_pseudo v0, v1 + return v2 +} +; run: %fmin_pseudo_f64x2([0x1.0 NaN], [0x2.0 0x2.0]) == [0x1.0 NaN] +; run: %fmin_pseudo_f64x2([0x0.1 -0x0.0], [NaN 0x0.0]) == [0x0.1 -0x0.0] + +function %fmax_pseudo_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0:f64x2, v1:f64x2): + v2 = fmax_pseudo v0, v1 + return v2 +} +; run: %fmax_pseudo_f64x2([0x1.0 NaN], [0x2.0 0x2.0]) == [0x2.0 NaN] +; run: %fmax_pseudo_f64x2([0x0.1 -0x0.0], [NaN 0x0.0]) == [0x0.1 -0x0.0] + From b79e59882d8eb682602a332a7322b6314220ec1c Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Aug 2021 18:28:33 +0200 Subject: [PATCH 4/4] Fix tests --- .../runtests/fmin-max-pseudo-vector.clif | 36 +++++++++++++++++++ .../filetests/runtests/fmin-max-pseudo.clif | 33 +---------------- 2 files changed, 37 insertions(+), 32 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif new file mode 100644 index 0000000000..5bd7b07ada --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif @@ -0,0 +1,36 @@ +test run +; target s390x TODO: Not yet implemented on s390x +set enable_simd +target aarch64 +target x86_64 machinst skylake + +function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0:f32x4, v1:f32x4): + v2 = fmin_pseudo v0, v1 + return v2 +} +; run: %fmin_pseudo_f32x4([0x1.0 NaN 0x0.1 -0x0.0], [0x2.0 0x2.0 NaN 0x0.0]) == [0x1.0 NaN 0x0.1 -0x0.0] + +function %fmax_pseudo_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0:f32x4, v1:f32x4): + v2 = fmax_pseudo v0, v1 + return v2 +} +; run: %fmax_pseudo_f32x4([0x1.0 NaN 0x0.1 -0x0.0], [0x2.0 0x2.0 NaN 0x0.0]) == [0x2.0 NaN 0x0.1 -0x0.0] + +function %fmin_pseudo_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0:f64x2, v1:f64x2): + v2 = fmin_pseudo v0, v1 + return v2 +} +; run: %fmin_pseudo_f64x2([0x1.0 NaN], [0x2.0 0x2.0]) == [0x1.0 NaN] +; run: %fmin_pseudo_f64x2([0x0.1 -0x0.0], [NaN 0x0.0]) == [0x0.1 -0x0.0] + +function %fmax_pseudo_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0:f64x2, v1:f64x2): + v2 = fmax_pseudo v0, v1 + return v2 +} +; run: %fmax_pseudo_f64x2([0x1.0 NaN], [0x2.0 0x2.0]) == [0x2.0 NaN] +; run: %fmax_pseudo_f64x2([0x0.1 -0x0.0], [NaN 0x0.0]) == [0x0.1 -0x0.0] + diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif index 15a716298e..a1273f9063 100644 --- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif @@ -1,5 +1,6 @@ test run ; target s390x TODO: Not yet implemented on s390x +; target aarch64 TODO: Not yet implemented on aarch64 set enable_simd target x86_64 machinst skylake @@ -47,35 +48,3 @@ block0(v0:f64, v1:f64): ; run: %fmax_pseudo_f64(0x0.0, 0x0.0) == 0x0.0 ; run: %fmax_pseudo_f64(-0x0.0, 0x0.0) == -0x0.0 -target aarch64 ; TODO scalar fmin_pseudo and fmax_pseudo are unimplemented for AArch64 - -function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 { -block0(v0:f32x4, v1:f32x4): - v2 = fmin_pseudo v0, v1 - return v2 -} -; run: %fmin_pseudo_f32x4([0x1.0 NaN 0x0.1 -0x0.0], [0x2.0 0x2.0 NaN 0x0.0]) == [0x1.0 NaN 0x0.1 -0x0.0] - -function %fmax_pseudo_f32x4(f32x4, f32x4) -> f32x4 { -block0(v0:f32x4, v1:f32x4): - v2 = fmax_pseudo v0, v1 - return v2 -} -; run: %fmax_pseudo_f32x4([0x1.0 NaN 0x0.1 -0x0.0], [0x2.0 0x2.0 NaN 0x0.0]) == [0x2.0 NaN 0x0.1 -0x0.0] - -function %fmin_pseudo_f64x2(f64x2, f64x2) -> f64x2 { -block0(v0:f64x2, v1:f64x2): - v2 = fmin_pseudo v0, v1 - return v2 -} -; run: %fmin_pseudo_f64x2([0x1.0 NaN], [0x2.0 0x2.0]) == [0x1.0 NaN] -; run: %fmin_pseudo_f64x2([0x0.1 -0x0.0], [NaN 0x0.0]) == [0x0.1 -0x0.0] - -function %fmax_pseudo_f64x2(f64x2, f64x2) -> f64x2 { -block0(v0:f64x2, v1:f64x2): - v2 = fmax_pseudo v0, v1 - return v2 -} -; run: %fmax_pseudo_f64x2([0x1.0 NaN], [0x2.0 0x2.0]) == [0x2.0 NaN] -; run: %fmax_pseudo_f64x2([0x0.1 -0x0.0], [NaN 0x0.0]) == [0x0.1 -0x0.0] -