From d3b525fa29ae55da9b78111e1c87af93dfd3427f Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 14 May 2021 18:12:13 +0100 Subject: [PATCH] aarch64: Implement iadd for i128 operands --- .../codegen/src/isa/aarch64/inst/emit.rs | 2 + .../src/isa/aarch64/inst/emit_tests.rs | 20 +++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 55 +++++---- .../codegen/src/isa/aarch64/lower_inst.rs | 115 +++++++++++------- .../filetests/isa/aarch64/arithmetic-run.clif | 39 ++++++ .../filetests/isa/aarch64/arithmetic.clif | 15 +++ 6 files changed, 179 insertions(+), 67 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 6621e3f409..89c4f88b4b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -597,6 +597,8 @@ impl MachInstEmit for Inst { let top11 = match alu_op { ALUOp::Add32 => 0b00001011_000, ALUOp::Add64 => 0b10001011_000, + ALUOp::Adc32 => 0b00011010_000, + ALUOp::Adc64 => 0b10011010_000, ALUOp::Sub32 => 0b01001011_000, ALUOp::Sub64 => 0b11001011_000, ALUOp::Orr32 => 0b00101010_000, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 9f628fced6..e568829955 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -50,6 +50,26 @@ fn test_aarch64_binemit() { "A400068B", "add x4, x5, x6", )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Adc32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100031A", + "adc w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Adc64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400069A", + "adc x4, x5, x6", + )); insns.push(( Inst::AluRRR { alu_op: ALUOp::Sub32, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 35903c18d0..15073a8247 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -84,6 +84,9 @@ pub enum ALUOp { Asr64, Lsl32, Lsl64, + /// Add with carry + Adc32, + Adc64, } /// An ALU operation with three arguments. @@ -1365,6 +1368,23 @@ impl Inst { } } + /// Create instructions that load a 128-bit constant. + pub fn load_constant128(to_regs: ValueRegs>, value: u128) -> SmallVec<[Inst; 4]> { + assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers"); + + let lower = value as u64; + let upper = (value >> 64) as u64; + + let lower_reg = to_regs.regs()[0]; + let upper_reg = to_regs.regs()[1]; + + let mut load_ins = Inst::load_constant(lower_reg, lower); + let load_upper = Inst::load_constant(upper_reg, upper); + + load_ins.extend(load_upper.into_iter()); + load_ins + } + /// Create instructions that load a 32-bit floating-point constant. pub fn load_fp_constant32 Writable>( rd: Writable, @@ -3033,30 +3053,15 @@ impl MachInst for Inst { ty: Type, alloc_tmp: F, ) -> SmallVec<[Inst; 4]> { - let to_reg = to_regs - .only_reg() - .expect("multi-reg values not supported yet"); - let value = value as u64; - if ty == F64 { - Inst::load_fp_constant64(to_reg, value, alloc_tmp) - } else if ty == F32 { - Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp) - } else { - // Must be an integer type. - debug_assert!( - ty == B1 - || ty == I8 - || ty == B8 - || ty == I16 - || ty == B16 - || ty == I32 - || ty == B32 - || ty == I64 - || ty == B64 - || ty == R32 - || ty == R64 - ); - Inst::load_constant(to_reg, value) + let to_reg = to_regs.only_reg(); + match ty { + F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp), + F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp), + B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => { + Inst::load_constant(to_reg.unwrap(), value as u64) + } + I128 => Inst::load_constant128(to_regs, value), + _ => panic!("Cannot generate constant for type: {}", ty), } } @@ -3202,6 +3207,8 @@ impl Inst { ALUOp::Asr64 => ("asr", OperandSize::Size64), ALUOp::Lsl32 => ("lsl", OperandSize::Size32), ALUOp::Lsl64 => ("lsl", OperandSize::Size64), + ALUOp::Adc32 => ("adc", OperandSize::Size32), + ALUOp::Adc64 => ("adc", OperandSize::Size64), } } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index fe56d1358d..d6b8933fc4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -64,59 +64,88 @@ pub(crate) fn lower_insn_to_regs>( lower_constant_f64(ctx, rd, value); } Opcode::Iadd => { - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let ty = ty.unwrap(); - if !ty.is_vector() { - let mul_insn = - if let Some(mul_insn) = maybe_input_insn(ctx, inputs[1], Opcode::Imul) { + match ty.unwrap() { + ty if ty.is_vector() => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::VecRRR { + rd, + rn, + rm, + alu_op: VecALUOp::Add, + size: VectorSize::from_ty(ty), + }); + } + I128 => { + let lhs = put_input_in_regs(ctx, inputs[0]); + let rhs = put_input_in_regs(ctx, inputs[1]); + let dst = get_output_reg(ctx, outputs[0]); + assert_eq!(lhs.len(), 2); + assert_eq!(rhs.len(), 2); + assert_eq!(dst.len(), 2); + + // adds x0, x0, x1 + // adc x1, x1, x3 + + // Add lower + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AddS64, + rd: dst.regs()[0], + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Adc64, + rd: dst.regs()[1], + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + } + ty => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let mul_insn = if let Some(mul_insn) = + maybe_input_insn(ctx, inputs[1], Opcode::Imul) + { Some((mul_insn, 0)) } else if let Some(mul_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Imul) { Some((mul_insn, 1)) } else { None }; - // If possible combine mul + add into madd. - if let Some((insn, addend_idx)) = mul_insn { - let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64); - let rn_input = InsnInput { insn, input: 0 }; - let rm_input = InsnInput { insn, input: 1 }; + // If possible combine mul + add into madd. + if let Some((insn, addend_idx)) = mul_insn { + let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64); + let rn_input = InsnInput { insn, input: 0 }; + let rm_input = InsnInput { insn, input: 1 }; - let rn = put_input_in_reg(ctx, rn_input, NarrowValueMode::None); - let rm = put_input_in_reg(ctx, rm_input, NarrowValueMode::None); - let ra = put_input_in_reg(ctx, inputs[addend_idx], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, rn_input, NarrowValueMode::None); + let rm = put_input_in_reg(ctx, rm_input, NarrowValueMode::None); + let ra = put_input_in_reg(ctx, inputs[addend_idx], NarrowValueMode::None); - ctx.emit(Inst::AluRRRR { - alu_op, - rd, - rn, - rm, - ra, - }); - } else { - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let (rm, negated) = put_input_in_rse_imm12_maybe_negated( - ctx, - inputs[1], - ty_bits(ty), - NarrowValueMode::None, - ); - let alu_op = if !negated { - choose_32_64(ty, ALUOp::Add32, ALUOp::Add64) + ctx.emit(Inst::AluRRRR { + alu_op, + rd, + rn, + rm, + ra, + }); } else { - choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64) - }; - ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let (rm, negated) = put_input_in_rse_imm12_maybe_negated( + ctx, + inputs[1], + ty_bits(ty), + NarrowValueMode::None, + ); + let alu_op = if !negated { + choose_32_64(ty, ALUOp::Add32, ALUOp::Add64) + } else { + choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64) + }; + ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + } } - } else { - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - ctx.emit(Inst::VecRRR { - rd, - rn, - rm, - alu_op: VecALUOp::Add, - size: VectorSize::from_ty(ty), - }); } } Opcode::Isub => { diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif new file mode 100644 index 0000000000..01bfdb115a --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif @@ -0,0 +1,39 @@ +test run +target aarch64 + +; i128 tests +; TODO: It would be nice if we had native support for i128 immediates in CLIF's parser +function %i128_const_0() -> i64, i64 { +block0: + v1 = iconst.i128 0 + v2, v3 = isplit v1 + return v2, v3 +} +; run: %i128_const_0() == [0, 0] + +; TODO: Blocked by https://github.com/bytecodealliance/wasmtime/issues/2906 +;function %i128_const_neg_1() -> i64, i64 { +;block0: +; v1 = iconst.i128 -1 +; v2, v3 = isplit v1 +; return v2, v3 +;} +; r-un: %i128_const_neg_1() == [0xffffffff_ffffffff, 0xffffffff_ffffffff] + + +function %add_i128(i64, i64, i64, i64) -> i64, i64 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + v6 = iadd v4, v5 + + v7, v8 = isplit v6 + return v7, v8 +} +; run: %add_i128(0, 0, 0, 0) == [0, 0] +; run: %add_i128(0, -1, -1, 0) == [-1, -1] +; run: %add_i128(1, 0, 0, 0) == [1, 0] +; run: %add_i128(1, 0, 1, 0) == [2, 0] +; run: %add_i128(1, 0, -1, -1) == [0, 0] +; run: %add_i128(-1, 0, 1, 0) == [0, 1] \ No newline at end of file diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif index 5fc88c97c7..91d4987d89 100644 --- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif @@ -425,3 +425,18 @@ block0(v0: i8x16): ; nextln: ushl v0.16b, v0.16b, v1.16b ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + + +function %add_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = iadd v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: adds x0, x0, x2 +; nextln: adc x1, x1, x3 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret +