diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 516cbd7515..d97656f765 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -424,6 +424,35 @@ pub(crate) fn put_input_in_rse_imm12>( ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)) } +/// Like `put_input_in_rse_imm12` above, except is allowed to negate the +/// argument (assuming a two's-complement representation with the given bit +/// width) if this allows use of 12-bit immediate. Used to flip `add`s with +/// negative immediates to `sub`s (and vice-versa). +pub(crate) fn put_input_in_rse_imm12_maybe_negated>( + ctx: &mut C, + input: InsnInput, + twos_complement_bits: usize, + narrow_mode: NarrowValueMode, +) -> (ResultRSEImm12, bool) { + assert!(twos_complement_bits <= 64); + if let Some(imm_value) = input_to_const(ctx, input) { + if let Some(i) = Imm12::maybe_from_u64(imm_value) { + return (ResultRSEImm12::Imm12(i), false); + } + let sign_extended = + ((imm_value as i64) << (64 - twos_complement_bits)) >> (64 - twos_complement_bits); + let inverted = sign_extended.wrapping_neg(); + if let Some(i) = Imm12::maybe_from_u64(inverted as u64) { + return (ResultRSEImm12::Imm12(i), true); + } + } + + ( + ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)), + false, + ) +} + pub(crate) fn put_input_in_rs_immlogic>( ctx: &mut C, input: InsnInput, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5db776765d..5fa0ebdc66 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -60,8 +60,17 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); if ty_bits(ty) < 128 { - let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None); - let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); + let (rm, negated) = put_input_in_rse_imm12_maybe_negated( + ctx, + inputs[1], + ty_bits(ty), + NarrowValueMode::None, + ); + let alu_op = if !negated { + choose_32_64(ty, ALUOp::Add32, ALUOp::Add64) + } else { + choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64) + }; ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); @@ -79,8 +88,17 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); if ty_bits(ty) < 128 { - let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None); - let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); + let (rm, negated) = put_input_in_rse_imm12_maybe_negated( + ctx, + inputs[1], + ty_bits(ty), + NarrowValueMode::None, + ); + let alu_op = if !negated { + choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64) + } else { + choose_32_64(ty, ALUOp::Add32, ALUOp::Add64) + }; ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); diff --git a/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif index 0a33eeaa47..aa52cb7436 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif @@ -380,3 +380,45 @@ block0(v0: i32, v1: i32): ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f26(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub w0, w0, #1 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f27(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = isub.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: add w0, w0, #1 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f28(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -1 + v2 = isub.i64 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: add x0, x0, #1 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret