From a2e74b2c4564d4d156fcbe3715e022025495e7c7 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 22 May 2021 21:22:05 +0100 Subject: [PATCH] aarch64: Implement isub for i128 operands --- .../codegen/src/isa/aarch64/inst/emit.rs | 2 + .../src/isa/aarch64/inst/emit_tests.rs | 21 ++++++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 5 ++ .../codegen/src/isa/aarch64/lower_inst.rs | 74 ++++++++++++------- .../filetests/isa/aarch64/arithmetic-run.clif | 21 +++++- .../filetests/isa/aarch64/arithmetic.clif | 13 ++++ 6 files changed, 109 insertions(+), 27 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 89c4f88b4b..60fedcd0d3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -601,6 +601,8 @@ impl MachInstEmit for Inst { ALUOp::Adc64 => 0b10011010_000, ALUOp::Sub32 => 0b01001011_000, ALUOp::Sub64 => 0b11001011_000, + ALUOp::Sbc32 => 0b01011010_000, + ALUOp::Sbc64 => 0b11011010_000, ALUOp::Orr32 => 0b00101010_000, ALUOp::Orr64 => 0b10101010_000, ALUOp::And32 => 0b00001010_000, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index e568829955..530269b201 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -90,6 +90,27 @@ fn test_aarch64_binemit() { "A40006CB", "sub x4, x5, x6", )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sbc32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100035A", + "sbc w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sbc64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006DA", + "sbc x4, x5, x6", + )); + insns.push(( Inst::AluRRR { alu_op: ALUOp::Orr32, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 15073a8247..ecdf43c6ff 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -87,6 +87,9 @@ pub enum ALUOp { /// Add with carry Adc32, Adc64, + /// Subtract with carry + Sbc32, + Sbc64, } /// An ALU operation with three arguments. @@ -3209,6 +3212,8 @@ impl Inst { ALUOp::Lsl64 => ("lsl", OperandSize::Size64), ALUOp::Adc32 => ("adc", OperandSize::Size32), ALUOp::Adc64 => ("adc", OperandSize::Size64), + ALUOp::Sbc32 => ("sbc", OperandSize::Size32), + ALUOp::Sbc64 => ("sbc", OperandSize::Size64), } } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index d6b8933fc4..2a3ee88fcc 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -85,10 +85,9 @@ pub(crate) fn lower_insn_to_regs>( assert_eq!(rhs.len(), 2); assert_eq!(dst.len(), 2); - // adds x0, x0, x1 + // adds x0, x0, x2 // adc x1, x1, x3 - // Add lower ctx.emit(Inst::AluRRR { alu_op: ALUOp::AddS64, rd: dst.regs()[0], @@ -149,31 +148,56 @@ pub(crate) fn lower_insn_to_regs>( } } Opcode::Isub => { - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); - if !ty.is_vector() { - let (rm, negated) = put_input_in_rse_imm12_maybe_negated( - ctx, - inputs[1], - ty_bits(ty), - NarrowValueMode::None, - ); - let alu_op = if !negated { - choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64) - } else { - choose_32_64(ty, ALUOp::Add32, ALUOp::Add64) - }; - ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); - } else { - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - ctx.emit(Inst::VecRRR { - rd, - rn, - rm, - alu_op: VecALUOp::Sub, - size: VectorSize::from_ty(ty), + if ty == I128 { + let lhs = put_input_in_regs(ctx, inputs[0]); + let rhs = put_input_in_regs(ctx, inputs[1]); + let dst = get_output_reg(ctx, outputs[0]); + assert_eq!(lhs.len(), 2); + assert_eq!(rhs.len(), 2); + assert_eq!(dst.len(), 2); + + // subs x0, x0, x2 + // sbc x1, x1, x3 + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: dst.regs()[0], + rn: lhs.regs()[0], + rm: rhs.regs()[0], }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Sbc64, + rd: dst.regs()[1], + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + } else { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if !ty.is_vector() { + let (rm, negated) = put_input_in_rse_imm12_maybe_negated( + ctx, + inputs[1], + ty_bits(ty), + NarrowValueMode::None, + ); + let alu_op = if !negated { + choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64) + } else { + choose_32_64(ty, ALUOp::Add32, ALUOp::Add64) + }; + ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::VecRRR { + rd, + rn, + rm, + alu_op: VecALUOp::Sub, + size: VectorSize::from_ty(ty), + }); + } } } Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => { diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif index 01bfdb115a..90fdd43e41 100644 --- a/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif @@ -2,7 +2,7 @@ test run target aarch64 ; i128 tests -; TODO: It would be nice if we had native support for i128 immediates in CLIF's parser +; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser function %i128_const_0() -> i64, i64 { block0: v1 = iconst.i128 0 @@ -36,4 +36,21 @@ block0(v0: i64,v1: i64,v2: i64,v3: i64): ; run: %add_i128(1, 0, 0, 0) == [1, 0] ; run: %add_i128(1, 0, 1, 0) == [2, 0] ; run: %add_i128(1, 0, -1, -1) == [0, 0] -; run: %add_i128(-1, 0, 1, 0) == [0, 1] \ No newline at end of file +; run: %add_i128(-1, 0, 1, 0) == [0, 1] + + +function %sub_i128(i64, i64, i64, i64) -> i64, i64 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + v6 = isub v4, v5 + + v7, v8 = isplit v6 + return v7, v8 +} +; run: %sub_i128(0, 0, 0, 0) == [0, 0] +; run: %sub_i128(1, 0, 1, 0) == [0, 0] +; run: %sub_i128(1, 0, 0, 0) == [1, 0] +; run: %sub_i128(0, 0, 1, 0) == [-1, -1] +; run: %sub_i128(0, 0, -1, -1) == [1, 0] \ No newline at end of file diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif index 91d4987d89..edd25c0023 100644 --- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif @@ -440,3 +440,16 @@ block0(v0: i128, v1: i128): ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret +function %sub_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = isub v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs x0, x0, x2 +; nextln: sbc x1, x1, x3 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret +