This PR switches the default backend on x86, for both the `cranelift-codegen` crate and for Wasmtime, to the new (`MachInst`-style, `VCode`-based) backend that has been under development and testing for some time now. The old backend is still available by default in builds with the `old-x86-backend` feature, or by requesting `BackendVariant::Legacy` from the appropriate APIs. As part of that switch, it adds some more runtime-configurable plumbing to the testing infrastructure so that tests can be run using the appropriate backend. `clif-util test` is now capable of parsing a backend selector option from filetests and instantiating the correct backend. CI has been updated so that the old x86 backend continues to run its tests, just as we used to run the new x64 backend separately. At some point, we will remove the old x86 backend entirely, once we are satisfied that the new backend has not caused any unforeseen issues and we do not need to revert.
280 lines
6.2 KiB
Plaintext
280 lines
6.2 KiB
Plaintext
test run
|
|
set enable_simd
|
|
target x86_64 machinst skylake
|
|
|
|
function %iadd_i32x4(i32x4, i32x4) -> i32x4 {
|
|
block0(v0:i32x4, v1:i32x4):
|
|
v2 = iadd v0, v1
|
|
return v2
|
|
}
|
|
; run: %iadd_i32x4([1 1 1 1], [1 2 3 4]) == [2 3 4 5]
|
|
|
|
function %iadd_i8x16_with_overflow() -> i8x16 {
|
|
block0:
|
|
v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
|
|
v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
|
|
v2 = iadd v0, v1
|
|
return v2
|
|
}
|
|
; run: %iadd_i8x16_with_overflow() == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
|
|
|
function %isub_i32x4_rex() -> b1 {
|
|
block0:
|
|
v0 = vconst.i32x4 [1 1 1 1]
|
|
v1 = vconst.i32x4 [1 2 3 4]
|
|
v2 = isub v0, v1
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 0
|
|
|
|
v5 = extractlane v2, 1
|
|
v6 = icmp_imm eq v5, 0xffffffff
|
|
; TODO replace extractlanes with vector comparison
|
|
|
|
v7 = band v4, v6
|
|
return v7
|
|
}
|
|
; run
|
|
|
|
|
|
function %ineg_i32x4() -> b1 {
|
|
block0:
|
|
v0 = vconst.i32x4 [1 1 1 1]
|
|
v2 = ineg v0
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, -1
|
|
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = imul v0, v1
|
|
return v2
|
|
}
|
|
; run: %imul_i64x2([0 2], [0 2]) == [0 4]
|
|
|
|
function %imul_i32x4() -> b1 {
|
|
block0:
|
|
v0 = vconst.i32x4 [-1 0 1 0x80_00_00_01]
|
|
v1 = vconst.i32x4 [2 2 2 2]
|
|
v2 = imul v0, v1
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, -2
|
|
|
|
v5 = extractlane v2, 1
|
|
v6 = icmp_imm eq v5, 0
|
|
|
|
v7 = extractlane v2, 3
|
|
v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)
|
|
|
|
v9 = band v4, v6
|
|
v10 = band v8, v9
|
|
return v10
|
|
}
|
|
; run
|
|
|
|
function %imul_i16x8() -> b1 {
|
|
block0:
|
|
v0 = vconst.i16x8 [-1 0 1 0x7f_ff 0 0 0 0]
|
|
v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
|
|
v2 = imul v0, v1
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is
|
|
; being uextend-ed, not sextend-ed
|
|
|
|
v5 = extractlane v2, 1
|
|
v6 = icmp_imm eq v5, 0
|
|
|
|
v7 = extractlane v2, 3
|
|
v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe
|
|
|
|
v9 = band v4, v6
|
|
v10 = band v8, v9
|
|
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %sadd_sat_i8x16() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
|
v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
|
|
|
v2 = sadd_sat v0, v1
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 127
|
|
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %uadd_sat_i16x8() -> b1 {
|
|
block0:
|
|
v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0]
|
|
v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
|
|
|
|
v2 = uadd_sat v0, v1
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 65535
|
|
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %sub_sat_i8x16() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128
|
|
v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
|
|
|
v2 = ssub_sat v0, v1
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128
|
|
|
|
; now re-use 0x80 as an unsigned 128
|
|
v5 = usub_sat v0, v2
|
|
v6 = extractlane v5, 0
|
|
v7 = icmp_imm eq v6, 0
|
|
|
|
v8 = band v4, v7
|
|
return v8
|
|
}
|
|
; run
|
|
|
|
;function %add_sub_f32x4() -> b1 {
|
|
;block0:
|
|
; v0 = vconst.f32x4 [0x4.2 0.0 0.0 0.0]
|
|
; v1 = vconst.f32x4 [0x1.0 0x1.0 0x1.0 0x1.0]
|
|
; v2 = vconst.f32x4 [0x5.2 0x1.0 0x1.0 0x1.0]
|
|
;
|
|
; v3 = fadd v0, v1
|
|
; v4 = fcmp eq v3, v2
|
|
;
|
|
; v6 = fsub v2, v1
|
|
; v7 = fcmp eq v6, v0
|
|
;
|
|
; v8 = band v4, v7
|
|
; v9 = vall_true v8
|
|
; return v9
|
|
;}
|
|
; _run
|
|
|
|
;function %mul_div_f32x4() -> b1 {
|
|
;block0:
|
|
; v0 = vconst.f32x4 [0x4.2 -0x2.1 0x2.0 0.0]
|
|
; v1 = vconst.f32x4 [0x3.4 0x6.7 0x8.9 0xa.b]
|
|
; v2 = vconst.f32x4 [0xd.68 -0xd.47 0x11.2 0x0.0]
|
|
;
|
|
; v3 = fmul v0, v1
|
|
; v4 = fcmp eq v3, v2
|
|
;
|
|
; v6 = fdiv v2, v1
|
|
; v7 = fcmp eq v6, v0
|
|
;
|
|
; v8 = band v4, v7
|
|
; v9 = vall_true v8
|
|
; return v9
|
|
;}
|
|
; _run
|
|
|
|
;function %sqrt_f64x2() -> b1 {
|
|
;block0:
|
|
; v0 = vconst.f64x2 [0x9.0 0x1.0]
|
|
; v1 = sqrt v0
|
|
; v2 = vconst.f64x2 [0x3.0 0x1.0]
|
|
; v3 = fcmp eq v2, v1
|
|
; v4 = vall_true v3
|
|
; return v4
|
|
;}
|
|
; _run
|
|
|
|
function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
|
|
block0(v0: f64x2, v1: f64x2):
|
|
v2 = fmax v0, v1
|
|
return v2
|
|
}
|
|
; note below how NaNs are quieted but (unlike fmin), retain their sign: this discrepancy is allowed by non-determinism
|
|
; in the spec, see https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
|
|
; run: %fmax_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [+0x0.0 0x1.0]
|
|
; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
|
|
; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
|
|
; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
|
|
; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0]
|
|
|
|
function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
|
|
block0(v0: f64x2, v1: f64x2):
|
|
v2 = fmin v0, v1
|
|
return v2
|
|
}
|
|
; note below how NaNs are quieted and negative: this is due to non-determinism in the spec for NaNs, see
|
|
; https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
|
|
; run: %fmin_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [-0x0.0 -0x1.0]
|
|
; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN -NaN]
|
|
; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [-NaN 0.0]
|
|
; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
|
|
; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [-NaN 0.0]
|
|
|
|
;function %fneg_f64x2() -> b1 {
|
|
;block0:
|
|
; v0 = vconst.f64x2 [0x1.0 -0x1.0]
|
|
; v1 = fneg v0
|
|
;
|
|
; v2 = vconst.f64x2 [-0x1.0 0x1.0]
|
|
; v3 = fcmp eq v1, v2
|
|
; v4 = vall_true v3
|
|
;
|
|
; return v4
|
|
;}
|
|
; _run
|
|
|
|
;function %fneg_f32x4() -> b1 {
|
|
;block0:
|
|
; v0 = vconst.f32x4 [0x0.0 -0x0.0 -Inf Inf]
|
|
; v1 = fneg v0
|
|
;
|
|
; v2 = vconst.f32x4 [-0x0.0 0x0.0 Inf -Inf]
|
|
; v3 = fcmp eq v1, v2
|
|
; v4 = vall_true v3
|
|
;
|
|
; return v4
|
|
;}
|
|
; _run
|
|
|
|
;function %fabs_f32x4() -> b1 {
|
|
;block0:
|
|
; v0 = vconst.f32x4 [0x0.0 -0x1.0 0x2.0 -0x3.0]
|
|
; v1 = fabs v0
|
|
;
|
|
; v2 = vconst.f32x4 [0x0.0 0x1.0 0x2.0 0x3.0]
|
|
; v3 = fcmp eq v1, v2
|
|
; v4 = vall_true v3
|
|
;
|
|
; return v4
|
|
;}
|
|
; _run
|
|
|
|
;function %average_rounding_i16x8() -> b1 {
|
|
;block0:
|
|
; v0 = vconst.i16x8 [0 0 0 1 42 19 -1 0xffff]
|
|
; v1 = vconst.i16x8 [0 1 2 4 42 18 -1 0]
|
|
; v2 = vconst.i16x8 [0 1 1 3 42 19 -1 0x8000]
|
|
;
|
|
; v3 = avg_round v0, v1
|
|
; v4 = icmp eq v2, v3
|
|
; v5 = vall_true v4
|
|
;
|
|
; return v5
|
|
;}
|
|
; _run
|
|
|
|
function %iabs(i32x4) -> i32x4 {
|
|
block0(v0: i32x4):
|
|
v1 = iabs v0
|
|
return v1
|
|
}
|
|
; run: %iabs([-42 -1 0 1]) == [42 1 0 1]
|