diff --git a/cranelift/codegen/meta/src/shared/legalize.rs b/cranelift/codegen/meta/src/shared/legalize.rs index bf046f5ddd..aef8cca89a 100644 --- a/cranelift/codegen/meta/src/shared/legalize.rs +++ b/cranelift/codegen/meta/src/shared/legalize.rs @@ -115,6 +115,7 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro let uextend = insts.by_name("uextend"); let uload8 = insts.by_name("uload8"); let uload16 = insts.by_name("uload16"); + let umulhi = insts.by_name("umulhi"); let ushr = insts.by_name("ushr"); let ushr_imm = insts.by_name("ushr_imm"); let urem = insts.by_name("urem"); @@ -336,6 +337,25 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro } } + // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a + // separate legalization for x86. + for &ty in &[I64, I128] { + narrow.legalize( + def!(a = imul.ty(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(a1 = imul(xh, yl)), + def!(a2 = imul(xl, yh)), + def!(a3 = iadd(a1, a2)), + def!(a4 = umulhi(xl, yl)), + def!(ah = iadd(a3, a4)), + def!(al = imul(xl, yl)), + def!(a = iconcat(al, ah)), + ], + ); + } + // Widen instructions with one input operand. for &op in &[bnot, popcnt] { for &int_ty in &[I8, I16] { diff --git a/cranelift/filetests/filetests/isa/x86/imul-i128.clif b/cranelift/filetests/filetests/isa/x86/imul-i128.clif new file mode 100644 index 0000000000..2d683a32dd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/imul-i128.clif @@ -0,0 +1,20 @@ +test run +target x86_64 haswell + +function %test_imul_i128() -> b1 { +ebb0: + v11 = iconst.i64 0xf2347ac4503f1e24 + v12 = iconst.i64 0x0098fe985354ab06 + v1 = iconcat v11, v12 + v21 = iconst.i64 0xf606ba453589ef89 + v22 = iconst.i64 0x042e1f3054ca7432 + v2 = iconcat v21, v22 + v31 = iconst.i64 0xbe2044b2742ebd44 + v32 = iconst.i64 0xa363ce3b6849f307 + v3 = iconcat v31, v32 + v4 = imul v1, v2 + v5 = icmp eq v3, v4 + return v5 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif new file mode 100644 index 0000000000..db071ba3c7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif @@ -0,0 +1,20 @@ +; Test the legalization of i128 instructions on x86_64. +test legalizer +target x86_64 haswell + +; regex: V=v\d+ + +function %imul(i128, i128) -> i128 { +ebb0(v1: i128, v2: i128): + v10 = imul v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb + ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb + ; nextln: $(v13=$V) = iadd $v11, $v12 + ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = iadd $v13, $v14 + ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif index 2723bd69b8..a484818a34 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif @@ -26,6 +26,21 @@ ebb0(v1: i64, v2: i64): return v10 } +function %imul(i64, i64) -> i64 { +ebb0(v1: i64, v2: i64): + v10 = imul v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb + ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb + ; nextln: $(v13=$V) = iadd $v11, $v12 + ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = iadd $v13, $v14 + ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} + function %icmp_eq(i64, i64) -> b1 { ebb0(v1: i64, v2: i64): v10 = icmp eq v1, v2