diff --git a/build.rs b/build.rs index 42e152604a..04dd042853 100644 --- a/build.rs +++ b/build.rs @@ -228,6 +228,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_i32x4_arith") => return false, ("simd", "simd_i32x4_arith2") => return false, ("simd", "simd_i32x4_cmp") => return false, + ("simd", "simd_lane") => return false, ("simd", "simd_load_extend") => return false, ("simd", "simd_load_splat") => return false, ("simd", "simd_store") => return false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 8b063d36c1..310db6ea68 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -378,6 +378,16 @@ fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: | machreg_to_vec(rd.to_reg()) } +fn enc_tbl(is_extension: bool, len: u32, rd: Writable, rn: Reg, rm: Reg) -> u32 { + debug_assert_eq!(len & 0b11, len); + 0b0_1_001110_000_00000_0_00_0_00_00000_00000 + | (machreg_to_vec(rm) << 16) + | len << 13 + | (is_extension as u32) << 12 + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + fn enc_dmb_ish() -> u32 { 0xD5033BBF } @@ -1396,6 +1406,24 @@ impl MachInstEmit for Inst { }; sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn)); } + &Inst::VecTbl { + rd, + rn, + rm, + is_extension, + } => { + sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm)); + } + &Inst::VecTbl2 { + rd, + rn, + rn2, + rm, + is_extension, + } => { + assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); + sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm)); + } &Inst::FpuCmp32 { rn, rm } => { sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm)); } @@ -1505,9 +1533,26 @@ impl MachInstEmit for Inst { }; sink.put4(enc_fround(top22, rd, rn)); } - &Inst::MovToVec64 { rd, rn } => { + &Inst::MovToFpu { rd, rn } => { sink.put4( - 0b010_01110000_01000_0_0011_1_00000_00000 + 0b100_11110_01_1_00_111_000000_00000_00000 + | (machreg_to_gpr(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::MovToVec { rd, rn, idx, size } => { + let (imm5, shift) = match size.lane_size() { + ScalarSize::Size8 => (0b00001, 1), + ScalarSize::Size16 => (0b00010, 2), + ScalarSize::Size32 => (0b00100, 3), + ScalarSize::Size64 => (0b01000, 4), + _ => unreachable!(), + }; + debug_assert_eq!(idx & (0b11111 >> shift), idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b010_01110000_00000_0_0011_1_00000_00000 + | (imm5 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()), ); @@ -1607,6 +1652,33 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } + &Inst::VecMovElement { + rd, + rn, + idx1, + idx2, + size, + } => { + let (imm5, shift) = match size.lane_size() { + ScalarSize::Size8 => (0b00001, 1), + ScalarSize::Size16 => (0b00010, 2), + ScalarSize::Size32 => (0b00100, 3), + ScalarSize::Size64 => (0b01000, 4), + _ => unreachable!(), + }; + let mask = 0b11111 >> shift; + debug_assert_eq!(idx1 & mask, idx1); + debug_assert_eq!(idx2 & mask, idx2); + let imm4 = (idx2 as u32) << (shift - 1); + let imm5 = imm5 | ((idx1 as u32) << shift); + sink.put4( + 0b011_01110000_00000_0_0000_1_00000_00000 + | (imm5 << 16) + | (imm4 << 11) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } &Inst::VecRRR { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 5f00e3c7fd..1d6e2070b0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1829,9 +1829,29 @@ fn test_aarch64_binemit() { "ccmp w3, #30, #NZCV, gt", )); insns.push(( - Inst::MovToVec64 { + Inst::MovToFpu { + rd: writable_vreg(31), + rn: xreg(0), + }, + "1F00679E", + "fmov d31, x0", + )); + insns.push(( + Inst::MovToVec { + rd: writable_vreg(0), + rn: xreg(0), + idx: 7, + size: VectorSize::Size8x8, + }, + "001C0F4E", + "mov v0.b[7], w0", + )); + insns.push(( + Inst::MovToVec { rd: writable_vreg(20), rn: xreg(21), + idx: 0, + size: VectorSize::Size64x2, }, "B41E084E", "mov v20.d[0], x21", @@ -2041,6 +2061,30 @@ fn test_aarch64_binemit() { "uxtl v28.2d, v2.2s", )); + insns.push(( + Inst::VecMovElement { + rd: writable_vreg(0), + rn: vreg(31), + idx1: 7, + idx2: 7, + size: VectorSize::Size16x8, + }, + "E0771E6E", + "mov v0.h[7], v31.h[7]", + )); + + insns.push(( + Inst::VecMovElement { + rd: writable_vreg(31), + rn: vreg(16), + idx1: 1, + idx2: 0, + size: VectorSize::Size32x2, + }, + "1F060C6E", + "mov v31.s[1], v16.s[0]", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Sqadd, @@ -3190,6 +3234,52 @@ fn test_aarch64_binemit() { "uminv s18, v4.4s", )); + insns.push(( + Inst::VecTbl { + rd: writable_vreg(0), + rn: vreg(31), + rm: vreg(16), + is_extension: false, + }, + "E003104E", + "tbl v0.16b, { v31.16b }, v16.16b", + )); + + insns.push(( + Inst::VecTbl { + rd: writable_vreg(4), + rn: vreg(12), + rm: vreg(23), + is_extension: true, + }, + "8411174E", + "tbx v4.16b, { v12.16b }, v23.16b", + )); + + insns.push(( + Inst::VecTbl2 { + rd: writable_vreg(16), + rn: vreg(31), + rn2: vreg(0), + rm: vreg(26), + is_extension: false, + }, + "F0231A4E", + "tbl v16.16b, { v31.16b, v0.16b }, v26.16b", + )); + + insns.push(( + Inst::VecTbl2 { + rd: writable_vreg(3), + rn: vreg(11), + rn2: vreg(12), + rm: vreg(19), + is_extension: true, + }, + "6331134E", + "tbx v3.16b, { v11.16b, v12.16b }, v19.16b", + )); + insns.push(( Inst::Extend { rd: writable_xreg(1), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 489e20576e..949189e8d3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -819,12 +819,20 @@ pub enum Inst { rn: Reg, }, - /// Move to a vector register from a GPR. - MovToVec64 { + /// Move from a GPR to a scalar FP register. + MovToFpu { rd: Writable, rn: Reg, }, + /// Move to a vector element from a GPR. + MovToVec { + rd: Writable, + rn: Reg, + idx: u8, + size: VectorSize, + }, + /// Unsigned move from a vector element to a GPR. MovFromVec { rd: Writable, @@ -863,6 +871,15 @@ pub enum Inst { rn: Reg, }, + /// Move vector element to another vector element. + VecMovElement { + rd: Writable, + rn: Reg, + idx1: u8, + idx2: u8, + size: VectorSize, + }, + /// A vector ALU op. VecRRR { alu_op: VecALUOp, @@ -888,6 +905,32 @@ pub enum Inst { size: VectorSize, }, + /// Table vector lookup - single register table. The table consists of 8-bit elements and is + /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether + /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination + /// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them + /// to 0. + VecTbl { + rd: Writable, + rn: Reg, + rm: Reg, + is_extension: bool, + }, + + /// Table vector lookup - two register table. The table consists of 8-bit elements and is + /// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension` + /// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in + /// the destination vector that correspond to out-of-range indices (greater than 31) unmodified + /// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers + /// modulo 32, that is v31 and v0 (in that order) are consecutive registers. + VecTbl2 { + rd: Writable, + rn: Reg, + rn2: Reg, + rm: Reg, + is_extension: bool, + }, + /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). MovToNZCV { rn: Reg, @@ -1377,6 +1420,39 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } + &Inst::VecTbl { + rd, + rn, + rm, + is_extension, + } => { + collector.add_use(rn); + collector.add_use(rm); + + if is_extension { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } + } + &Inst::VecTbl2 { + rd, + rn, + rn2, + rm, + is_extension, + } => { + collector.add_use(rn); + collector.add_use(rn2); + collector.add_use(rm); + + if is_extension { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } + } + &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { collector.add_use(rn); collector.add_use(rm); @@ -1427,10 +1503,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } - &Inst::MovToVec64 { rd, rn } => { + &Inst::MovToFpu { rd, rn } => { collector.add_def(rd); collector.add_use(rn); } + &Inst::MovToVec { rd, rn, .. } => { + collector.add_mod(rd); + collector.add_use(rn); + } &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => { collector.add_def(rd); collector.add_use(rn); @@ -1447,6 +1527,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } + &Inst::VecMovElement { rd, rn, .. } => { + collector.add_mod(rd); + collector.add_use(rn); + } &Inst::VecRRR { alu_op, rd, rn, rm, .. } => { @@ -1905,6 +1989,38 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); map_use(mapper, rn); } + &mut Inst::VecTbl { + ref mut rd, + ref mut rn, + ref mut rm, + is_extension, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + + if is_extension { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } + } + &mut Inst::VecTbl2 { + ref mut rd, + ref mut rn, + ref mut rn2, + ref mut rm, + is_extension, + } => { + map_use(mapper, rn); + map_use(mapper, rn2); + map_use(mapper, rm); + + if is_extension { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } + } &mut Inst::FpuCmp32 { ref mut rn, ref mut rm, @@ -2020,13 +2136,21 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); map_use(mapper, rn); } - &mut Inst::MovToVec64 { + &mut Inst::MovToFpu { ref mut rd, ref mut rn, } => { map_def(mapper, rd); map_use(mapper, rn); } + &mut Inst::MovToVec { + ref mut rd, + ref mut rn, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rn); + } &mut Inst::MovFromVec { ref mut rd, ref mut rn, @@ -2064,6 +2188,14 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); map_use(mapper, rn); } + &mut Inst::VecMovElement { + ref mut rd, + ref mut rn, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rn); + } &mut Inst::VecRRR { alu_op, ref mut rd, @@ -2871,10 +3003,15 @@ impl Inst { let rn = show_vreg_scalar(rn, mb_rru, size); format!("{} {}, {}", inst, rd, rn) } - &Inst::MovToVec64 { rd, rn } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - format!("mov {}.d[0], {}", rd, rn) + &Inst::MovToFpu { rd, rn } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); + let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size64); + format!("fmov {}, {}", rd, rn) + } + &Inst::MovToVec { rd, rn, idx, size } => { + let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size); + let rn = show_ireg_sized(rn, mb_rru, size.operand_size()); + format!("mov {}, {}", rd, rn) } &Inst::MovFromVec { rd, rn, idx, size } => { let op = match size { @@ -2922,6 +3059,17 @@ impl Inst { let rn = show_vreg_vector(rn, mb_rru, src); format!("{} {}, {}", op, rd, rn) } + &Inst::VecMovElement { + rd, + rn, + idx1, + idx2, + size, + } => { + let rd = show_vreg_element(rd.to_reg(), mb_rru, idx1, size); + let rn = show_vreg_element(rn, mb_rru, idx2, size); + format!("mov {}, {}", rd, rn) + } &Inst::VecRRR { rd, rn, @@ -2992,6 +3140,32 @@ impl Inst { let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}", op, rd, rn) } + &Inst::VecTbl { + rd, + rn, + rm, + is_extension, + } => { + let op = if is_extension { "tbx" } else { "tbl" }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); + let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + format!("{} {}, {{ {} }}, {}", op, rd, rn, rm) + } + &Inst::VecTbl2 { + rd, + rn, + rn2, + rm, + is_extension, + } => { + let op = if is_extension { "tbx" } else { "tbl" }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); + let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16); + let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm) + } &Inst::MovToNZCV { rn } => { let rn = rn.show_rru(mb_rru); format!("msr nzcv, {}", rn) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 076145d6d6..4ec0871eb1 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -142,28 +142,26 @@ pub(crate) fn input_to_shiftimm>( input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift) } -pub(crate) fn output_to_const_f128>( +pub(crate) fn const_param_to_u128>( ctx: &mut C, - out: InsnOutput, + inst: IRInst, ) -> Option { - if out.output > 0 { - None - } else { - let inst_data = ctx.data(out.insn); + let data = match ctx.data(inst) { + &InstructionData::Shuffle { mask, .. } => ctx.get_immediate(mask), + &InstructionData::UnaryConst { + constant_handle, .. + } => ctx.get_constant_data(constant_handle), + _ => return None, + }; + let data = data.clone().into_vec(); - match inst_data { - &InstructionData::UnaryConst { - opcode: _, - constant_handle, - } => { - let mut bytes = [0u8; 16]; - let c = ctx.get_constant_data(constant_handle).clone().into_vec(); - assert_eq!(c.len(), 16); - bytes.copy_from_slice(&c); - Some(u128::from_le_bytes(bytes)) - } - _ => None, - } + if data.len() == 16 { + let mut bytes = [0u8; 16]; + + bytes.copy_from_slice(&data); + Some(u128::from_le_bytes(bytes)) + } else { + None } } @@ -1016,7 +1014,8 @@ pub fn ty_bits(ty: Type) -> usize { pub(crate) fn ty_is_int(ty: Type) -> bool { match ty { B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 | R32 | R64 => true, - F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false, + F32 | F64 | B128 | F32X2 | F32X4 | F64X2 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 + | I32X4 | I64X2 => false, IFLAGS | FFLAGS => panic!("Unexpected flags type"), _ => panic!("ty_is_int() on unknown type: {:?}", ty), } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index aae8b2e607..25dc268dec 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -141,8 +141,8 @@ pub(crate) fn lower_insn_to_regs>( let vb = ctx.alloc_tmp(RegClass::V128, I128); let ra = put_input_in_reg(ctx, inputs[0], narrow_mode); let rb = put_input_in_reg(ctx, inputs[1], narrow_mode); - ctx.emit(Inst::MovToVec64 { rd: va, rn: ra }); - ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb }); + ctx.emit(Inst::MovToFpu { rd: va, rn: ra }); + ctx.emit(Inst::MovToFpu { rd: vb, rn: rb }); ctx.emit(Inst::FpuRRR { fpu_op, rd: va, @@ -1537,7 +1537,7 @@ pub(crate) fn lower_insn_to_regs>( } (false, true) => { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); - ctx.emit(Inst::MovToVec64 { rd, rn }); + ctx.emit(Inst::MovToFpu { rd, rn }); } (true, false) => { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); @@ -1789,7 +1789,7 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::Vconst => { - let value = output_to_const_f128(ctx, outputs[0]).unwrap(); + let value = const_param_to_u128(ctx, insn).expect("Invalid immediate bytes"); let rd = get_output_reg(ctx, outputs[0]); lower_constant_f128(ctx, rd, value); } @@ -1822,6 +1822,34 @@ pub(crate) fn lower_insn_to_regs>( } } + Opcode::Insertlane => { + let idx = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) { + *imm + } else { + unreachable!(); + }; + let input_ty = ctx.input_ty(insn, 1); + let rd = get_output_reg(ctx, outputs[0]); + let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ty = ty.unwrap(); + let size = VectorSize::from_ty(ty); + + ctx.emit(Inst::gen_move(rd, rm, ty)); + + if ty_is_int(input_ty) { + ctx.emit(Inst::MovToVec { rd, rn, idx, size }); + } else { + ctx.emit(Inst::VecMovElement { + rd, + rn, + idx1: idx, + idx2: 0, + size, + }); + } + } + Opcode::Splat => { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); @@ -1885,12 +1913,51 @@ pub(crate) fn lower_insn_to_regs>( normalize_bool_result(ctx, insn, rd); } - Opcode::Shuffle - | Opcode::Vsplit + Opcode::Shuffle => { + let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes"); + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + // 2 register table vector lookups require consecutive table registers; + // we satisfy this constraint by hardcoding the usage of v29 and v30. + let temp = writable_vreg(29); + let temp2 = writable_vreg(30); + let input_ty = ctx.input_ty(insn, 0); + assert_eq!(input_ty, ctx.input_ty(insn, 1)); + // Make sure that both inputs are in virtual registers, since it is + // not guaranteed that we can get them safely to the temporaries if + // either is in a real register. + let rn = ctx.ensure_in_vreg(rn, input_ty); + let rn2 = ctx.ensure_in_vreg(rn2, input_ty); + + lower_constant_f128(ctx, rd, mask); + ctx.emit(Inst::gen_move(temp, rn, input_ty)); + ctx.emit(Inst::gen_move(temp2, rn2, input_ty)); + ctx.emit(Inst::VecTbl2 { + rd, + rn: temp.to_reg(), + rn2: temp2.to_reg(), + rm: rd.to_reg(), + is_extension: false, + }); + } + + Opcode::Swizzle => { + let rd = get_output_reg(ctx, outputs[0]); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + + ctx.emit(Inst::VecTbl { + rd, + rn, + rm, + is_extension: false, + }); + } + + Opcode::Vsplit | Opcode::Vconcat - | Opcode::Insertlane | Opcode::ScalarToVector - | Opcode::Swizzle | Opcode::Uload8x8Complex | Opcode::Sload8x8Complex | Opcode::Uload16x4Complex diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index d1b08e4d27..08dffacc00 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -8,8 +8,9 @@ use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit}; use crate::ir::instructions::BranchInfo; use crate::ir::types::I64; use crate::ir::{ - ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, Inst, - InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef, + ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, + Immediate, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, + ValueDef, }; use crate::machinst::{ ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder, @@ -160,6 +161,8 @@ pub trait LowerCtx { fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool; /// Retrieve constant data given a handle. fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData; + /// Retrieve an immediate given a reference. + fn get_immediate(&self, imm: Immediate) -> &ConstantData; /// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg /// if `reg` is a real reg. `ty` describes the type of the value in `reg`. fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg; @@ -997,6 +1000,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { self.f.dfg.constants.get(constant_handle) } + fn get_immediate(&self, imm: Immediate) -> &ConstantData { + self.f.dfg.immediates.get(imm).unwrap() + } + fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg { if reg.is_virtual() { reg diff --git a/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif b/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif index 0c1b97809c..7116205dd5 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif @@ -9,8 +9,8 @@ block0(v0: i64, v1: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: mov v0.d[0], x0 -; nextln: mov v1.d[0], x1 +; nextln: fmov d0, x0 +; nextln: fmov d1, x1 ; nextln: uqadd d0, d0, d1 ; nextln: mov x0, v0.d[0] ; nextln: mov sp, fp @@ -27,8 +27,8 @@ block0(v0: i8, v1: i8): ; nextln: mov fp, sp ; nextln: uxtb x0, w0 ; nextln: uxtb x1, w1 -; nextln: mov v0.d[0], x0 -; nextln: mov v1.d[0], x1 +; nextln: fmov d0, x0 +; nextln: fmov d1, x1 ; nextln: uqadd d0, d0, d1 ; nextln: mov x0, v0.d[0] ; nextln: mov sp, fp