diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index dd41912479..2b416145bc 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -133,6 +133,9 @@ pub enum MemArg { /// first. RegScaledExtended(Reg, Reg, Type, ExtendOp), + /// Register plus register offset, with index sign- or zero-extended first. + RegExtended(Reg, Reg, ExtendOp), + /// Unscaled signed 9-bit immediate offset from reg. Unscaled(Reg, SImm9), @@ -412,6 +415,19 @@ impl ShowWithRRU for MemArg { shift ) } + &MemArg::RegExtended(r1, r2, op) => { + let size = match op { + ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32, + _ => InstSize::Size64, + }; + let op = op.show_rru(mb_rru); + format!( + "[{}, {}, {}]", + r1.show_rru(mb_rru), + show_ireg_sized(r2, mb_rru, size), + op, + ) + } &MemArg::Label(ref label) => label.show_rru(mb_rru), &MemArg::PreIndexed(r, simm9) => format!( "[{}, {}]!", diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index d4bf3055ed..263241835f 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -707,6 +707,16 @@ impl MachInstEmit for Inst { op, r1, r2, /* scaled = */ true, extendop, rd, )); } + &MemArg::RegExtended(r1, r2, extendop) => { + sink.put4(enc_ldst_reg( + op, + r1, + r2, + /* scaled = */ false, + Some(extendop), + rd, + )); + } &MemArg::Label(ref label) => { let offset = match label { // cast i32 to u32 (two's-complement) @@ -833,6 +843,16 @@ impl MachInstEmit for Inst { op, r1, r2, /* scaled = */ true, extendop, rd, )); } + &MemArg::RegExtended(r1, r2, extendop) => { + sink.put4(enc_ldst_reg( + op, + r1, + r2, + /* scaled = */ false, + Some(extendop), + rd, + )); + } &MemArg::Label(..) => { panic!("Store to a MemLabel not implemented!"); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 0cb54ceced..7b2c095035 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1281,6 +1281,15 @@ fn test_aarch64_binemit() { "41D863F8", "ldr x1, [x2, w3, SXTW #3]", )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW), + srcloc: None, + }, + "41C863F8", + "ldr x1, [x2, w3, SXTW]", + )); insns.push(( Inst::ULoad64 { rd: writable_xreg(1), @@ -1474,6 +1483,15 @@ fn test_aarch64_binemit() { "415823F8", "str x1, [x2, w3, UXTW #3]", )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW), + srcloc: None, + }, + "414823F8", + "str x1, [x2, w3, UXTW]", + )); insns.push(( Inst::Store64 { rd: xreg(1), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 831481814c..6c5eb4d995 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1049,7 +1049,8 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { } &MemArg::RegReg(r1, r2, ..) | &MemArg::RegScaled(r1, r2, ..) - | &MemArg::RegScaledExtended(r1, r2, ..) => { + | &MemArg::RegScaledExtended(r1, r2, ..) + | &MemArg::RegExtended(r1, r2, ..) => { collector.add_use(r1); collector.add_use(r2); } @@ -1384,15 +1385,10 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { match mem { &mut MemArg::Unscaled(ref mut reg, ..) => map_use(m, reg), &mut MemArg::UnsignedOffset(ref mut reg, ..) => map_use(m, reg), - &mut MemArg::RegReg(ref mut r1, ref mut r2) => { - map_use(m, r1); - map_use(m, r2); - } - &mut MemArg::RegScaled(ref mut r1, ref mut r2, ..) => { - map_use(m, r1); - map_use(m, r2); - } - &mut MemArg::RegScaledExtended(ref mut r1, ref mut r2, ..) => { + &mut MemArg::RegReg(ref mut r1, ref mut r2) + | &mut MemArg::RegScaled(ref mut r1, ref mut r2, ..) + | &mut MemArg::RegScaledExtended(ref mut r1, ref mut r2, ..) + | &mut MemArg::RegExtended(ref mut r1, ref mut r2, ..) => { map_use(m, r1); map_use(m, r2); } diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 5cb4940b66..d1526c2ae9 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -550,7 +550,51 @@ pub(crate) fn lower_address>( return MemArg::RegOffset(reg, offset as i64, elem_ty); } - // Handle two regs and a zero offset, if possible. + // Handle two regs and a zero offset with built-in extend, if possible. + if addends.len() == 2 && offset == 0 { + // r1, r2 (to be extended), r2_bits, is_signed + let mut parts: Option<(Reg, Reg, usize, bool)> = None; + // Handle extension of either first or second addend. + for i in 0..2 { + if let Some((op, ext_insn)) = + maybe_input_insn_multi(ctx, addends[i], &[Opcode::Uextend, Opcode::Sextend]) + { + // Non-extended addend. + let r1 = input_to_reg(ctx, addends[1 - i], NarrowValueMode::ZeroExtend64); + // Extended addend. + let r2 = input_to_reg( + ctx, + InsnInput { + insn: ext_insn, + input: 0, + }, + NarrowValueMode::None, + ); + let r2_bits = ty_bits(ctx.input_ty(ext_insn, 0)); + parts = Some(( + r1, + r2, + r2_bits, + /* is_signed = */ op == Opcode::Sextend, + )); + break; + } + } + + if let Some((r1, r2, r2_bits, is_signed)) = parts { + match (r2_bits, is_signed) { + (32, false) => { + return MemArg::RegExtended(r1, r2, ExtendOp::UXTW); + } + (32, true) => { + return MemArg::RegExtended(r1, r2, ExtendOp::SXTW); + } + _ => {} + } + } + } + + // Handle two regs and a zero offset in the general case, if possible. if addends.len() == 2 && offset == 0 { let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64); let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64); @@ -812,6 +856,20 @@ pub(crate) fn maybe_input_insn>( None } +/// Checks for an instance of any one of `ops` feeding the given input. +pub(crate) fn maybe_input_insn_multi>( + c: &mut C, + input: InsnInput, + ops: &[Opcode], +) -> Option<(Opcode, IRInst)> { + for &op in ops { + if let Some(inst) = maybe_input_insn(c, input, op) { + return Some((op, inst)); + } + } + None +} + /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g., /// Bint or a bitcast). /// diff --git a/cranelift/filetests/filetests/vcode/aarch64/amodes.clif b/cranelift/filetests/filetests/vcode/aarch64/amodes.clif new file mode 100644 index 0000000000..96855d00b6 --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/amodes.clif @@ -0,0 +1,58 @@ +test compile +target aarch64 + +function %f0(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = load_complex.i32 v0+v2 + return v3 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr w0, [x0, w1, UXTW] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f1(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = load_complex.i32 v2+v0 + return v3 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr w0, [x0, w1, UXTW] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f1(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = load_complex.i32 v0+v2 + return v3 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr w0, [x0, w1, SXTW] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f1(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = load_complex.i32 v2+v0 + return v3 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr w0, [x0, w1, SXTW] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret