diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs new file mode 100644 index 0000000000..7a0cd8379c --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -0,0 +1,293 @@ +use crate::cdsl::ast::{bind, var, ExprBuilder, Literal}; +use crate::cdsl::inst::InstructionGroup; +use crate::cdsl::xform::TransformGroupBuilder; + +use crate::shared::types::Int::{I32, I64}; +use crate::shared::Definitions as SharedDefinitions; + +pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) { + let mut group = TransformGroupBuilder::new( + "x86_expand", + r#" + Legalize instructions by expansion. + + Use x86-specific instructions if needed."#, + ) + .isa("x86") + .chain_with(shared.transform_groups.by_name("expand_flags").id); + + // List of instructions. + let insts = &shared.instructions; + let band = insts.by_name("band"); + let bor = insts.by_name("bor"); + let clz = insts.by_name("clz"); + let ctz = insts.by_name("ctz"); + let fcmp = insts.by_name("fcmp"); + let fcvt_from_uint = insts.by_name("fcvt_from_uint"); + let fcvt_to_sint = insts.by_name("fcvt_to_sint"); + let fcvt_to_uint = insts.by_name("fcvt_to_uint"); + let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); + let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); + let fmax = insts.by_name("fmax"); + let fmin = insts.by_name("fmin"); + let iadd = insts.by_name("iadd"); + let iconst = insts.by_name("iconst"); + let imul = insts.by_name("imul"); + let isub = insts.by_name("isub"); + let popcnt = insts.by_name("popcnt"); + let sdiv = insts.by_name("sdiv"); + let selectif = insts.by_name("selectif"); + let smulhi = insts.by_name("smulhi"); + let srem = insts.by_name("srem"); + let udiv = insts.by_name("udiv"); + let umulhi = insts.by_name("umulhi"); + let ushr_imm = insts.by_name("ushr_imm"); + let urem = insts.by_name("urem"); + + let x86_bsf = x86_instructions.by_name("x86_bsf"); + let x86_bsr = x86_instructions.by_name("x86_bsr"); + let x86_umulx = x86_instructions.by_name("x86_umulx"); + let x86_smulx = x86_instructions.by_name("x86_smulx"); + + // List of immediates. + let floatcc = shared.operand_kinds.by_name("floatcc"); + let imm64 = shared.operand_kinds.by_name("imm64"); + let intcc = shared.operand_kinds.by_name("intcc"); + + // Division and remainder. + // + // The srem expansion requires custom code because srem INT_MIN, -1 is not + // allowed to trap. The other ops need to check avoid_div_traps. + group.custom_legalize(sdiv, "expand_sdivrem"); + group.custom_legalize(srem, "expand_sdivrem"); + group.custom_legalize(udiv, "expand_udivrem"); + group.custom_legalize(urem, "expand_udivrem"); + + // Double length (widening) multiplication. + let a = var("a"); + let x = var("x"); + let y = var("y"); + let a1 = var("a1"); + let a2 = var("a2"); + let res_lo = var("res_lo"); + let res_hi = var("res_hi"); + + group.legalize( + def!(res_hi = umulhi(x, y)), + vec![def!((res_lo, res_hi) = x86_umulx(x, y))], + ); + + group.legalize( + def!(res_hi = smulhi(x, y)), + vec![def!((res_lo, res_hi) = x86_smulx(x, y))], + ); + + // Floating point condition codes. + // + // The 8 condition codes in `supported_floatccs` are directly supported by a + // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization + // patterns. + + let floatcc_eq = Literal::enumerator_for(floatcc, "eq"); + let floatcc_ord = Literal::enumerator_for(floatcc, "ord"); + let floatcc_ueq = Literal::enumerator_for(floatcc, "ueq"); + let floatcc_ne = Literal::enumerator_for(floatcc, "ne"); + let floatcc_uno = Literal::enumerator_for(floatcc, "uno"); + let floatcc_one = Literal::enumerator_for(floatcc, "one"); + + // Equality needs an explicit `ord` test which checks the parity bit. + group.legalize( + def!(a = fcmp(floatcc_eq, x, y)), + vec![ + def!(a1 = fcmp(floatcc_ord, x, y)), + def!(a2 = fcmp(floatcc_ueq, x, y)), + def!(a = band(a1, a2)), + ], + ); + group.legalize( + def!(a = fcmp(floatcc_ne, x, y)), + vec![ + def!(a1 = fcmp(floatcc_uno, x, y)), + def!(a2 = fcmp(floatcc_one, x, y)), + def!(a = bor(a1, a2)), + ], + ); + + let floatcc_lt = &Literal::enumerator_for(floatcc, "lt"); + let floatcc_gt = &Literal::enumerator_for(floatcc, "gt"); + let floatcc_le = &Literal::enumerator_for(floatcc, "le"); + let floatcc_ge = &Literal::enumerator_for(floatcc, "ge"); + let floatcc_ugt = &Literal::enumerator_for(floatcc, "ugt"); + let floatcc_ult = &Literal::enumerator_for(floatcc, "ult"); + let floatcc_uge = &Literal::enumerator_for(floatcc, "uge"); + let floatcc_ule = &Literal::enumerator_for(floatcc, "ule"); + + // Inequalities that need to be reversed. + for &(cc, rev_cc) in &[ + (floatcc_lt, floatcc_gt), + (floatcc_le, floatcc_ge), + (floatcc_ugt, floatcc_ult), + (floatcc_uge, floatcc_ule), + ] { + group.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]); + } + + // We need to modify the CFG for min/max legalization. + group.custom_legalize(fmin, "expand_minmax"); + group.custom_legalize(fmax, "expand_minmax"); + + // Conversions from unsigned need special handling. + group.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint"); + // Conversions from float to int can trap and modify the control flow graph. + group.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint"); + group.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint"); + group.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat"); + group.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat"); + + // Count leading and trailing zeroes, for baseline x86_64 + let c_minus_one = var("c_minus_one"); + let c_thirty_one = var("c_thirty_one"); + let c_thirty_two = var("c_thirty_two"); + let c_sixty_three = var("c_sixty_three"); + let c_sixty_four = var("c_sixty_four"); + let index1 = var("index1"); + let r2flags = var("r2flags"); + let index2 = var("index2"); + + let intcc_eq = Literal::enumerator_for(intcc, "eq"); + let imm64_minus_one = Literal::constant(imm64, -1); + let imm64_63 = Literal::constant(imm64, 63); + group.legalize( + def!(a = clz.I64(x)), + vec![ + def!(c_minus_one = iconst(imm64_minus_one)), + def!(c_sixty_three = iconst(imm64_63)), + def!((index1, r2flags) = x86_bsr(x)), + def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), + def!(a = isub(c_sixty_three, index2)), + ], + ); + + let imm64_31 = Literal::constant(imm64, 31); + group.legalize( + def!(a = clz.I32(x)), + vec![ + def!(c_minus_one = iconst(imm64_minus_one)), + def!(c_thirty_one = iconst(imm64_31)), + def!((index1, r2flags) = x86_bsr(x)), + def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), + def!(a = isub(c_thirty_one, index2)), + ], + ); + + let imm64_64 = Literal::constant(imm64, 64); + group.legalize( + def!(a = ctz.I64(x)), + vec![ + def!(c_sixty_four = iconst(imm64_64)), + def!((index1, r2flags) = x86_bsf(x)), + def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)), + ], + ); + + let imm64_32 = Literal::constant(imm64, 32); + group.legalize( + def!(a = ctz.I32(x)), + vec![ + def!(c_thirty_two = iconst(imm64_32)), + def!((index1, r2flags) = x86_bsf(x)), + def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)), + ], + ); + + // Population count for baseline x86_64 + let qv1 = var("qv1"); + let qv3 = var("qv3"); + let qv4 = var("qv4"); + let qv5 = var("qv5"); + let qv6 = var("qv6"); + let qv7 = var("qv7"); + let qv8 = var("qv8"); + let qv9 = var("qv9"); + let qv10 = var("qv10"); + let qv11 = var("qv11"); + let qv12 = var("qv12"); + let qv13 = var("qv13"); + let qv14 = var("qv14"); + let qv15 = var("qv15"); + let qv16 = var("qv16"); + let qc77 = var("qc77"); + #[allow(non_snake_case)] + let qc0F = var("qc0F"); + let qc01 = var("qc01"); + + let imm64_1 = Literal::constant(imm64, 1); + let imm64_4 = Literal::constant(imm64, 4); + group.legalize( + def!(qv16 = popcnt.I64(qv1)), + vec![ + def!(qv3 = ushr_imm(qv1, imm64_1)), + def!(qc77 = iconst(Literal::constant(imm64, 0x7777777777777777))), + def!(qv4 = band(qv3, qc77)), + def!(qv5 = isub(qv1, qv4)), + def!(qv6 = ushr_imm(qv4, imm64_1)), + def!(qv7 = band(qv6, qc77)), + def!(qv8 = isub(qv5, qv7)), + def!(qv9 = ushr_imm(qv7, imm64_1)), + def!(qv10 = band(qv9, qc77)), + def!(qv11 = isub(qv8, qv10)), + def!(qv12 = ushr_imm(qv11, imm64_4)), + def!(qv13 = iadd(qv11, qv12)), + def!(qc0F = iconst(Literal::constant(imm64, 0x0F0F0F0F0F0F0F0F))), + def!(qv14 = band(qv13, qc0F)), + def!(qc01 = iconst(Literal::constant(imm64, 0x0101010101010101))), + def!(qv15 = imul(qv14, qc01)), + def!(qv16 = ushr_imm(qv15, Literal::constant(imm64, 56))), + ], + ); + + let lv1 = var("lv1"); + let lv3 = var("lv3"); + let lv4 = var("lv4"); + let lv5 = var("lv5"); + let lv6 = var("lv6"); + let lv7 = var("lv7"); + let lv8 = var("lv8"); + let lv9 = var("lv9"); + let lv10 = var("lv10"); + let lv11 = var("lv11"); + let lv12 = var("lv12"); + let lv13 = var("lv13"); + let lv14 = var("lv14"); + let lv15 = var("lv15"); + let lv16 = var("lv16"); + let lc77 = var("lc77"); + #[allow(non_snake_case)] + let lc0F = var("lc0F"); + let lc01 = var("lc01"); + + group.legalize( + def!(lv16 = popcnt.I32(lv1)), + vec![ + def!(lv3 = ushr_imm(lv1, imm64_1)), + def!(lc77 = iconst(Literal::constant(imm64, 0x77777777))), + def!(lv4 = band(lv3, lc77)), + def!(lv5 = isub(lv1, lv4)), + def!(lv6 = ushr_imm(lv4, imm64_1)), + def!(lv7 = band(lv6, lc77)), + def!(lv8 = isub(lv5, lv7)), + def!(lv9 = ushr_imm(lv7, imm64_1)), + def!(lv10 = band(lv9, lc77)), + def!(lv11 = isub(lv8, lv10)), + def!(lv12 = ushr_imm(lv11, imm64_4)), + def!(lv13 = iadd(lv11, lv12)), + def!(lc0F = iconst(Literal::constant(imm64, 0x0F0F0F0F))), + def!(lv14 = band(lv13, lc0F)), + def!(lc01 = iconst(Literal::constant(imm64, 0x01010101))), + def!(lv15 = imul(lv14, lc01)), + def!(lv16 = ushr_imm(lv15, Literal::constant(imm64, 24))), + ], + ); + + group.finish_and_add_to(&mut shared.transform_groups); +} diff --git a/cranelift/codegen/meta/src/shared/legalize.rs b/cranelift/codegen/meta/src/shared/legalize.rs new file mode 100644 index 0000000000..172d6fd981 --- /dev/null +++ b/cranelift/codegen/meta/src/shared/legalize.rs @@ -0,0 +1,785 @@ +use crate::cdsl::ast::{bind, var, ExprBuilder, Literal}; +use crate::cdsl::inst::{Instruction, InstructionGroup}; +use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups}; + +use crate::shared::OperandKinds; + +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; + +pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformGroups { + let mut narrow = TransformGroupBuilder::new( + "narrow", + r#" + Legalize instructions by narrowing. + + The transformations in the 'narrow' group work by expressing + instructions in terms of smaller types. Operations on vector types are + expressed in terms of vector types with fewer lanes, and integer + operations are expressed in terms of smaller integer types. + "#, + ); + + let mut widen = TransformGroupBuilder::new( + "widen", + r#" + Legalize instructions by widening. + + The transformations in the 'widen' group work by expressing + instructions in terms of larger types. + "#, + ); + + let mut expand = TransformGroupBuilder::new( + "expand", + r#" + Legalize instructions by expansion. + + Rewrite instructions in terms of other instructions, generally + operating on the same types as the original instructions. + "#, + ); + + // List of instructions. + let band = insts.by_name("band"); + let band_imm = insts.by_name("band_imm"); + let band_not = insts.by_name("band_not"); + let bint = insts.by_name("bint"); + let bitrev = insts.by_name("bitrev"); + let bnot = insts.by_name("bnot"); + let bor = insts.by_name("bor"); + let bor_imm = insts.by_name("bor_imm"); + let bor_not = insts.by_name("bor_not"); + let br_icmp = insts.by_name("br_icmp"); + let br_table = insts.by_name("br_table"); + let bxor = insts.by_name("bxor"); + let bxor_imm = insts.by_name("bxor_imm"); + let bxor_not = insts.by_name("bxor_not"); + let cls = insts.by_name("cls"); + let clz = insts.by_name("clz"); + let ctz = insts.by_name("ctz"); + let fabs = insts.by_name("fabs"); + let f32const = insts.by_name("f32const"); + let f64const = insts.by_name("f64const"); + let fcopysign = insts.by_name("fcopysign"); + let fneg = insts.by_name("fneg"); + let iadd = insts.by_name("iadd"); + let iadd_carry = insts.by_name("iadd_carry"); + let iadd_cin = insts.by_name("iadd_cin"); + let iadd_cout = insts.by_name("iadd_cout"); + let iadd_imm = insts.by_name("iadd_imm"); + let icmp = insts.by_name("icmp"); + let icmp_imm = insts.by_name("icmp_imm"); + let iconcat = insts.by_name("iconcat"); + let iconst = insts.by_name("iconst"); + let ifcmp = insts.by_name("ifcmp"); + let ifcmp_imm = insts.by_name("ifcmp_imm"); + let imul = insts.by_name("imul"); + let imul_imm = insts.by_name("imul_imm"); + let ireduce = insts.by_name("ireduce"); + let irsub_imm = insts.by_name("irsub_imm"); + let ishl = insts.by_name("ishl"); + let ishl_imm = insts.by_name("ishl_imm"); + let isplit = insts.by_name("isplit"); + let istore8 = insts.by_name("istore8"); + let istore16 = insts.by_name("istore16"); + let isub = insts.by_name("isub"); + let isub_bin = insts.by_name("isub_bin"); + let isub_borrow = insts.by_name("isub_borrow"); + let isub_bout = insts.by_name("isub_bout"); + let load = insts.by_name("load"); + let popcnt = insts.by_name("popcnt"); + let rotl = insts.by_name("rotl"); + let rotl_imm = insts.by_name("rotl_imm"); + let rotr = insts.by_name("rotr"); + let rotr_imm = insts.by_name("rotr_imm"); + let sdiv = insts.by_name("sdiv"); + let sdiv_imm = insts.by_name("sdiv_imm"); + let select = insts.by_name("select"); + let sextend = insts.by_name("sextend"); + let sshr = insts.by_name("sshr"); + let sshr_imm = insts.by_name("sshr_imm"); + let srem = insts.by_name("srem"); + let srem_imm = insts.by_name("srem_imm"); + let store = insts.by_name("store"); + let udiv = insts.by_name("udiv"); + let udiv_imm = insts.by_name("udiv_imm"); + let uextend = insts.by_name("uextend"); + let uload8 = insts.by_name("uload8"); + let uload16 = insts.by_name("uload16"); + let ushr = insts.by_name("ushr"); + let ushr_imm = insts.by_name("ushr_imm"); + let urem = insts.by_name("urem"); + let urem_imm = insts.by_name("urem_imm"); + let trapif = insts.by_name("trapif"); + let trapnz = insts.by_name("trapnz"); + let trapz = insts.by_name("trapz"); + + // Custom expansions for memory objects. + expand.custom_legalize(insts.by_name("global_value"), "expand_global_value"); + expand.custom_legalize(insts.by_name("heap_addr"), "expand_heap_addr"); + expand.custom_legalize(insts.by_name("table_addr"), "expand_table_addr"); + + // Custom expansions for calls. + expand.custom_legalize(insts.by_name("call"), "expand_call"); + + // Custom expansions that need to change the CFG. + // TODO: Add sufficient XForm syntax that we don't need to hand-code these. + expand.custom_legalize(trapz, "expand_cond_trap"); + expand.custom_legalize(trapnz, "expand_cond_trap"); + expand.custom_legalize(br_table, "expand_br_table"); + expand.custom_legalize(select, "expand_select"); + + // Custom expansions for floating point constants. + // These expansions require bit-casting or creating constant pool entries. + expand.custom_legalize(f32const, "expand_fconst"); + expand.custom_legalize(f64const, "expand_fconst"); + + // Custom expansions for stack memory accesses. + expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); + expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); + + // List of immediates. + let imm64 = immediates.by_name("imm64"); + let ieee32 = immediates.by_name("ieee32"); + let ieee64 = immediates.by_name("ieee64"); + let intcc = immediates.by_name("intcc"); + + // List of variables to reuse in patterns. + let x = var("x"); + let y = var("y"); + let z = var("z"); + let a = var("a"); + let a1 = var("a1"); + let a2 = var("a2"); + let a3 = var("a3"); + let a4 = var("a4"); + let b = var("b"); + let b1 = var("b1"); + let b2 = var("b2"); + let b3 = var("b3"); + let b4 = var("b4"); + let b_in = var("b_in"); + let b_int = var("b_int"); + let c = var("c"); + let c1 = var("c1"); + let c2 = var("c2"); + let c3 = var("c3"); + let c4 = var("c4"); + let c_in = var("c_in"); + let c_int = var("c_int"); + let d = var("d"); + let d1 = var("d1"); + let d2 = var("d2"); + let d3 = var("d3"); + let d4 = var("d4"); + let e = var("e"); + let e1 = var("e1"); + let e2 = var("e2"); + let e3 = var("e3"); + let e4 = var("e4"); + let f = var("f"); + let f1 = var("f1"); + let f2 = var("f2"); + let xl = var("xl"); + let xh = var("xh"); + let yl = var("yl"); + let yh = var("yh"); + let al = var("al"); + let ah = var("ah"); + let cc = var("cc"); + let ptr = var("ptr"); + let flags = var("flags"); + let offset = var("off"); + + narrow.legalize( + def!(a = iadd(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, c) = iadd_cout(xl, yl)), + def!(ah = iadd_cin(xh, yh, c)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow.legalize( + def!(a = isub(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, b) = isub_bout(xl, yl)), + def!(ah = isub_bin(xh, yh, b)), + def!(a = iconcat(al, ah)), + ], + ); + + for &bin_op in &[band, bor, bxor] { + narrow.legalize( + def!(a = bin_op(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(al = bin_op(xl, yl)), + def!(ah = bin_op(xh, yh)), + def!(a = iconcat(al, ah)), + ], + ); + } + + narrow.legalize( + def!(a = select(c, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(al = select(c, xl, yl)), + def!(ah = select(c, xh, yh)), + def!(a = iconcat(al, ah)), + ], + ); + + // Widen instructions with one input operand. + for &op in &[bnot, popcnt] { + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = op.int_ty(b)), + vec![ + def!(x = uextend.I32(b)), + def!(z = op.I32(x)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + } + + // Widen instructions with two input operands. + let mut widen_two_arg = |signed: bool, op: &Instruction| { + for &int_ty in &[I8, I16] { + let sign_ext_op = if signed { sextend } else { uextend }; + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sign_ext_op.I32(b)), + def!(y = sign_ext_op.I32(c)), + def!(z = op.I32(x, y)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + }; + + for bin_op in &[ + iadd, isub, imul, udiv, urem, band, bor, bxor, band_not, bor_not, bxor_not, + ] { + widen_two_arg(false, bin_op); + } + for bin_op in &[sdiv, srem] { + widen_two_arg(true, bin_op); + } + + // Widen instructions using immediate operands. + let mut widen_imm = |signed: bool, op: &Instruction| { + for &int_ty in &[I8, I16] { + let sign_ext_op = if signed { sextend } else { uextend }; + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sign_ext_op.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + }; + + for bin_op in &[ + iadd_imm, imul_imm, udiv_imm, urem_imm, band_imm, bor_imm, bxor_imm, irsub_imm, + ] { + widen_imm(false, bin_op); + } + for bin_op in &[sdiv_imm, srem_imm] { + widen_imm(true, bin_op); + } + + for &(int_ty, num) in &[(I8, 24), (I16, 16)] { + let imm = Literal::constant(imm64, -num); + + widen.legalize( + def!(a = clz.int_ty(b)), + vec![ + def!(c = uextend.I32(b)), + def!(d = clz.I32(c)), + def!(e = iadd_imm(d, imm)), + def!(a = ireduce.int_ty(e)), + ], + ); + + widen.legalize( + def!(a = cls.int_ty(b)), + vec![ + def!(c = sextend.I32(b)), + def!(d = cls.I32(c)), + def!(e = iadd_imm(d, imm)), + def!(a = ireduce.int_ty(e)), + ], + ); + } + + for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] { + let num = Literal::constant(imm64, num); + widen.legalize( + def!(a = ctz.int_ty(b)), + vec![ + def!(c = uextend.I32(b)), + // When `b` is zero, returns the size of x in bits. + def!(d = bor_imm(c, num)), + def!(e = ctz.I32(d)), + def!(a = ireduce.int_ty(e)), + ], + ); + } + + // iconst + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = iconst.int_ty(b)), + vec![def!(c = iconst.I32(b)), def!(a = ireduce.int_ty(c))], + ); + } + + for &extend_op in &[uextend, sextend] { + // The sign extension operators have two typevars: the result has one and controls the + // instruction, then the input has one. + let bound = bind(bind(extend_op, I16), I8); + widen.legalize( + def!(a = bound(b)), + vec![def!(c = extend_op.I32(b)), def!(a = ireduce(c))], + ); + } + + widen.legalize( + def!(store.I8(flags, a, ptr, offset)), + vec![ + def!(b = uextend.I32(a)), + def!(istore8(flags, b, ptr, offset)), + ], + ); + + widen.legalize( + def!(store.I16(flags, a, ptr, offset)), + vec![ + def!(b = uextend.I32(a)), + def!(istore16(flags, b, ptr, offset)), + ], + ); + + widen.legalize( + def!(a = load.I8(flags, ptr, offset)), + vec![ + def!(b = uload8.I32(flags, ptr, offset)), + def!(a = ireduce(b)), + ], + ); + + widen.legalize( + def!(a = load.I16(flags, ptr, offset)), + vec![ + def!(b = uload16.I32(flags, ptr, offset)), + def!(a = ireduce(b)), + ], + ); + + for &int_ty in &[I8, I16] { + widen.legalize( + def!(br_table.int_ty(x, y, z)), + vec![def!(b = uextend.I32(x)), def!(br_table(b, y, z))], + ); + } + + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = bint.int_ty(b)), + vec![def!(x = bint.I32(b)), def!(a = ireduce.int_ty(x))], + ); + } + + for &int_ty in &[I8, I16] { + for &op in &[ishl, ishl_imm, ushr, ushr_imm] { + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = uextend.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + + for &op in &[sshr, sshr_imm] { + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sextend.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + + for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] { + let w_cc = Literal::enumerator_for(intcc, cc); + widen.legalize( + def!(a = icmp_imm.int_ty(w_cc, b, c)), + vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], + ); + widen.legalize( + def!(a = icmp.int_ty(w_cc, b, c)), + vec![ + def!(x = uextend.I32(b)), + def!(y = uextend.I32(c)), + def!(a = icmp.I32(w_cc, x, y)), + ], + ); + } + + for cc in &["sgt", "slt", "sge", "sle"] { + let w_cc = Literal::enumerator_for(intcc, cc); + widen.legalize( + def!(a = icmp_imm.int_ty(w_cc, b, c)), + vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], + ); + + widen.legalize( + def!(a = icmp.int_ty(w_cc, b, c)), + vec![ + def!(x = sextend.I32(b)), + def!(y = sextend.I32(c)), + def!(a = icmp(w_cc, x, y)), + ], + ); + } + } + + // Expand integer operations with carry for RISC architectures that don't have + // the flags. + let intcc_ult = Literal::enumerator_for(intcc, "ult"); + expand.legalize( + def!((a, c) = iadd_cout(x, y)), + vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))], + ); + + let intcc_ugt = Literal::enumerator_for(intcc, "ugt"); + expand.legalize( + def!((a, b) = isub_bout(x, y)), + vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))], + ); + + expand.legalize( + def!(a = iadd_cin(x, y, c)), + vec![ + def!(a1 = iadd(x, y)), + def!(c_int = bint(c)), + def!(a = iadd(a1, c_int)), + ], + ); + + expand.legalize( + def!(a = isub_bin(x, y, b)), + vec![ + def!(a1 = isub(x, y)), + def!(b_int = bint(b)), + def!(a = isub(a1, b_int)), + ], + ); + + expand.legalize( + def!((a, c) = iadd_carry(x, y, c_in)), + vec![ + def!((a1, c1) = iadd_cout(x, y)), + def!(c_int = bint(c_in)), + def!((a, c2) = iadd_cout(a1, c_int)), + def!(c = bor(c1, c2)), + ], + ); + + expand.legalize( + def!((a, b) = isub_borrow(x, y, b_in)), + vec![ + def!((a1, b1) = isub_bout(x, y)), + def!(b_int = bint(b_in)), + def!((a, b2) = isub_bout(a1, b_int)), + def!(b = bor(b1, b2)), + ], + ); + + // Expansions for immediate operands that are out of range. + for &(inst_imm, inst) in &[ + (iadd_imm, iadd), + (imul_imm, imul), + (sdiv_imm, sdiv), + (udiv_imm, udiv), + (srem_imm, srem), + (urem_imm, urem), + (band_imm, band), + (bor_imm, bor), + (bxor_imm, bxor), + (ifcmp_imm, ifcmp), + ] { + expand.legalize( + def!(a = inst_imm(x, y)), + vec![def!(a1 = iconst(y)), def!(a = inst(x, a1))], + ); + } + + expand.legalize( + def!(a = irsub_imm(y, x)), + vec![def!(a1 = iconst(x)), def!(a = isub(a1, y))], + ); + + // Rotates and shifts. + for &(inst_imm, inst) in &[ + (rotl_imm, rotl), + (rotr_imm, rotr), + (ishl_imm, ishl), + (sshr_imm, sshr), + (ushr_imm, ushr), + ] { + expand.legalize( + def!(a = inst_imm(x, y)), + vec![def!(a1 = iconst.I32(y)), def!(a = inst(x, a1))], + ); + } + + expand.legalize( + def!(a = icmp_imm(cc, x, y)), + vec![def!(a1 = iconst(y)), def!(a = icmp(cc, x, a1))], + ); + + //# Expansions for *_not variants of bitwise ops. + for &(inst_not, inst) in &[(band_not, band), (bor_not, bor), (bxor_not, bxor)] { + expand.legalize( + def!(a = inst_not(x, y)), + vec![def!(a1 = bnot(y)), def!(a = inst(x, a1))], + ); + } + + //# Expand bnot using xor. + let minus_one = Literal::constant(imm64, -1); + expand.legalize( + def!(a = bnot(x)), + vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))], + ); + + //# Expand bitrev + //# Adapted from Stack Overflow. + //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c + let imm64_1 = Literal::constant(imm64, 1); + let imm64_2 = Literal::constant(imm64, 2); + let imm64_4 = Literal::constant(imm64, 4); + + widen.legalize( + def!(a = bitrev.I8(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(imm64, 0xaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(imm64, 0x55))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(imm64, 0xcc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(imm64, 0x33))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(imm64, 0xf0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(imm64, 0x0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(a = bor(c2, c4)), + ], + ); + + let imm64_8 = Literal::constant(imm64, 8); + + widen.legalize( + def!(a = bitrev.I16(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(imm64, 0xaaaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(imm64, 0x5555))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(imm64, 0xcccc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(imm64, 0x3333))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(imm64, 0xf0f0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(imm64, 0x0f0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, Literal::constant(imm64, 0xff00))), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, Literal::constant(imm64, 0x00ff))), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(a = bor(d2, d4)), + ], + ); + + let imm64_16 = Literal::constant(imm64, 16); + + expand.legalize( + def!(a = bitrev.I32(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(imm64, 0xaaaaaaaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(imm64, 0x55555555))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(imm64, 0xcccccccc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(imm64, 0x33333333))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(imm64, 0xf0f0f0f0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(imm64, 0x0f0f0f0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, Literal::constant(imm64, 0xff00ff00))), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, Literal::constant(imm64, 0x00ff00ff))), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(e = bor(d2, d4)), + def!(e1 = ushr_imm(e, imm64_16)), + def!(e2 = ishl_imm(e, imm64_16)), + def!(a = bor(e1, e2)), + ], + ); + + #[allow(overflowing_literals)] + let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(imm64, 0xaaaaaaaaaaaaaaaa); + let imm64_0x5555555555555555 = Literal::constant(imm64, 0x5555555555555555); + #[allow(overflowing_literals)] + let imm64_0xcccccccccccccccc = Literal::constant(imm64, 0xcccccccccccccccc); + let imm64_0x3333333333333333 = Literal::constant(imm64, 0x3333333333333333); + #[allow(overflowing_literals)] + let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(imm64, 0xf0f0f0f0f0f0f0f0); + let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(imm64, 0x0f0f0f0f0f0f0f0f); + #[allow(overflowing_literals)] + let imm64_0xff00ff00ff00ff00 = Literal::constant(imm64, 0xff00ff00ff00ff00); + let imm64_0x00ff00ff00ff00ff = Literal::constant(imm64, 0x00ff00ff00ff00ff); + #[allow(overflowing_literals)] + let imm64_0xffff0000ffff0000 = Literal::constant(imm64, 0xffff0000ffff0000); + let imm64_0x0000ffff0000ffff = Literal::constant(imm64, 0x0000ffff0000ffff); + let imm64_32 = Literal::constant(imm64, 32); + + expand.legalize( + def!(a = bitrev.I64(x)), + vec![ + def!(a1 = band_imm(x, imm64_0xaaaaaaaaaaaaaaaa)), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, imm64_0x5555555555555555)), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, imm64_0xcccccccccccccccc)), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, imm64_0x3333333333333333)), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, imm64_0xf0f0f0f0f0f0f0f0)), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, imm64_0x0f0f0f0f0f0f0f0f)), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, imm64_0xff00ff00ff00ff00)), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, imm64_0x00ff00ff00ff00ff)), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(e = bor(d2, d4)), + def!(e1 = band_imm(e, imm64_0xffff0000ffff0000)), + def!(e2 = ushr_imm(e1, imm64_16)), + def!(e3 = band_imm(e, imm64_0x0000ffff0000ffff)), + def!(e4 = ishl_imm(e3, imm64_16)), + def!(f = bor(e2, e4)), + def!(f1 = ushr_imm(f, imm64_32)), + def!(f2 = ishl_imm(f, imm64_32)), + def!(a = bor(f1, f2)), + ], + ); + + // Floating-point sign manipulations. + for &(ty, const_inst, minus_zero) in &[ + (F32, f32const, &Literal::bits(ieee32, 0x80000000)), + (F64, f64const, &Literal::bits(ieee64, 0x8000000000000000)), + ] { + expand.legalize( + def!(a = fabs.ty(x)), + vec![def!(b = const_inst(minus_zero)), def!(a = band_not(x, b))], + ); + + expand.legalize( + def!(a = fneg.ty(x)), + vec![def!(b = const_inst(minus_zero)), def!(a = bxor(x, b))], + ); + + expand.legalize( + def!(a = fcopysign.ty(x, y)), + vec![ + def!(b = const_inst(minus_zero)), + def!(a1 = band_not(x, b)), + def!(a2 = band(y, b)), + def!(a = bor(a1, a2)), + ], + ); + } + + expand.custom_legalize(br_icmp, "expand_br_icmp"); + + let mut groups = TransformGroups::new(); + + narrow.finish_and_add_to(&mut groups); + let expand_id = expand.finish_and_add_to(&mut groups); + + // Expansions using CPU flags. + let mut expand_flags = TransformGroupBuilder::new( + "expand_flags", + r#" + Instruction expansions for architectures with flags. + + Expand some instructions using CPU flags, then fall back to the normal + expansions. Not all architectures support CPU flags, so these patterns + are kept separate. + "#, + ) + .chain_with(expand_id); + + let imm64_0 = Literal::constant(imm64, 0); + let intcc_ne = Literal::enumerator_for(intcc, "ne"); + let intcc_eq = Literal::enumerator_for(intcc, "eq"); + + expand_flags.legalize( + def!(trapnz(x, c)), + vec![ + def!(a = ifcmp_imm(x, imm64_0)), + def!(trapif(intcc_ne, a, c)), + ], + ); + + expand_flags.legalize( + def!(trapz(x, c)), + vec![ + def!(a = ifcmp_imm(x, imm64_0)), + def!(trapif(intcc_eq, a, c)), + ], + ); + + expand_flags.finish_and_add_to(&mut groups); + + // XXX The order of declarations unfortunately matters to be compatible with the Python code. + // When it's all migrated, we can put this next to the narrow/expand finish_and_add_to calls + // above. + widen.finish_and_add_to(&mut groups); + + groups +} diff --git a/cranelift/codegen/meta/src/shared/mod.rs b/cranelift/codegen/meta/src/shared/mod.rs index 0e6a11f0e1..5237d9ba33 100644 --- a/cranelift/codegen/meta/src/shared/mod.rs +++ b/cranelift/codegen/meta/src/shared/mod.rs @@ -4,6 +4,7 @@ pub mod entities; pub mod formats; pub mod immediates; pub mod instructions; +pub mod legalize; pub mod settings; pub mod types; @@ -11,12 +12,14 @@ use crate::cdsl::formats::FormatRegistry; use crate::cdsl::inst::InstructionGroup; use crate::cdsl::operands::OperandKind; use crate::cdsl::settings::SettingGroup; +use crate::cdsl::xform::TransformGroups; pub struct Definitions { pub settings: SettingGroup, pub instructions: InstructionGroup, pub operand_kinds: OperandKinds, pub format_registry: FormatRegistry, + pub transform_groups: TransformGroups, } pub struct OperandKinds(Vec); @@ -50,10 +53,14 @@ pub fn define() -> Definitions { let immediates = OperandKinds(immediates::define()); let entities = OperandKinds(entities::define()); let format_registry = formats::define(&immediates, &entities); + let instructions = instructions::define(&format_registry, &immediates, &entities); + let transform_groups = legalize::define(&instructions, &immediates); + Definitions { settings: settings::define(), - instructions: instructions::define(&format_registry, &immediates, &entities), + instructions, operand_kinds: immediates, format_registry, + transform_groups, } }