From 7bd96c8e2f148b33e61feed0320a98c4deec0426 Mon Sep 17 00:00:00 2001 From: Kasey Carrothers Date: Sun, 31 Jan 2021 17:40:04 -0800 Subject: [PATCH] Refactor x64::Insts that use an is_64 bool to use OperandSize. --- cranelift/codegen/src/isa/x64/abi.rs | 12 +- cranelift/codegen/src/isa/x64/inst/args.rs | 9 +- cranelift/codegen/src/isa/x64/inst/emit.rs | 77 +++-- .../codegen/src/isa/x64/inst/emit_tests.rs | 256 +++++++++++---- cranelift/codegen/src/isa/x64/inst/mod.rs | 218 ++++++------- cranelift/codegen/src/isa/x64/inst/unwind.rs | 6 +- cranelift/codegen/src/isa/x64/lower.rs | 300 ++++++++++-------- 7 files changed, 541 insertions(+), 337 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 98f3c17c3b..cf106ea4e5 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -329,7 +329,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { ret.push(Inst::gen_move(into_reg, from_reg, I64)); } ret.push(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::imm(imm), into_reg, @@ -388,7 +388,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { let amount = amount as u32; smallvec![Inst::alu_rmi_r( - true, + OperandSize::Size64, alu_op, RegMemImm::imm(amount), Writable::from_reg(regs::rsp()), @@ -409,14 +409,14 @@ impl ABIMachineSpec for X64ABIMachineSpec { // RSP before the call will be 0 % 16. So here, it is 8 % 16. insts.push(Inst::push64(RegMemImm::reg(r_rbp))); // RSP is now 0 % 16 - insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); + insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp)); insts } fn gen_epilogue_frame_restore() -> SmallInstVec { let mut insts = SmallVec::new(); insts.push(Inst::mov_r_r( - true, + OperandSize::Size64, regs::rbp(), Writable::from_reg(regs::rsp()), )); @@ -461,7 +461,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { // instruction. if stack_size > 0 { insts.push(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::imm(stack_size), Writable::from_reg(regs::rsp()), @@ -520,7 +520,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { // Adjust RSP back upward. if stack_size > 0 { insts.push(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::imm(stack_size), Writable::from_reg(regs::rsp()), diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 5c16498db0..930839459b 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1347,13 +1347,8 @@ impl OperandSize { } // Check that the value of self is one of the allowed sizes. - pub(crate) fn is_size(&self, sizes: &[Self]) -> bool { - for val in sizes.iter() { - if *self == *val { - return true; - } - } - false + pub(crate) fn is_one_of(&self, sizes: &[Self]) -> bool { + sizes.iter().any(|val| *self == *val) } pub(crate) fn to_bytes(&self) -> u8 { diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 02570ef249..828337c128 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -541,12 +541,12 @@ pub(crate) fn emit( match inst { Inst::AluRmiR { - is_64, + size, op, src, dst: reg_g, } => { - let mut rex = if *is_64 { + let mut rex = if *size == OperandSize::Size64 { RexFlags::set_w() } else { RexFlags::clear_w() @@ -612,7 +612,7 @@ pub(crate) fn emit( AluRmiROpcode::Or8 => (0x08, 0x0A, 1, true), AluRmiROpcode::Mul => panic!("unreachable"), }; - assert!(!(is_8bit && *is_64)); + assert!(!(is_8bit && *size == OperandSize::Size64)); match src { RegMemImm::Reg { reg: reg_e } => { @@ -960,12 +960,12 @@ pub(crate) fn emit( } Inst::Imm { - dst_is_64, + dst_size, simm64, dst, } => { let enc_dst = int_reg_enc(dst.to_reg()); - if *dst_is_64 { + if *dst_size == OperandSize::Size64 { if low32_will_sign_extend_to_64(*simm64) { // Sign-extended move imm32. emit_std_enc_enc( @@ -992,8 +992,8 @@ pub(crate) fn emit( } } - Inst::MovRR { is_64, src, dst } => { - let rex = if *is_64 { + Inst::MovRR { size, src, dst } => { + let rex = if *size == OperandSize::Size64 { RexFlags::set_w() } else { RexFlags::clear_w() @@ -1495,12 +1495,7 @@ pub(crate) fn emit( } } - Inst::XmmCmove { - is_64, - cc, - src, - dst, - } => { + Inst::XmmCmove { size, cc, src, dst } => { // Lowering of the Select IR opcode when the input is an fcmp relies on the fact that // this doesn't clobber flags. Make sure to not do so here. let next = sink.get_label(); @@ -1508,7 +1503,7 @@ pub(crate) fn emit( // Jump if cc is *not* set. one_way_jmp(sink, cc.invert(), next); - let op = if *is_64 { + let op = if *size == OperandSize::Size64 { SseOpcode::Movsd } else { SseOpcode::Movss @@ -1774,7 +1769,7 @@ pub(crate) fn emit( // Add base of jump table to jump-table-sourced block offset. let inst = Inst::alu_rmi_r( - true, /* is_64 */ + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(tmp2.to_reg()), *tmp1, @@ -2094,7 +2089,7 @@ pub(crate) fn emit( src, dst, imm, - is64, + size, } => { let (prefix, opcode, len) = match op { SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2), @@ -2116,7 +2111,7 @@ pub(crate) fn emit( SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3), _ => unimplemented!("Opcode {:?} not implemented", op), }; - let rex = if *is64 { + let rex = if *size == OperandSize::Size64 { RexFlags::set_w() } else { RexFlags::clear_w() @@ -2289,7 +2284,7 @@ pub(crate) fn emit( } Inst::CvtUint64ToFloatSeq { - to_f64, + dst_size, src, dst, tmp_gpr1, @@ -2336,7 +2331,14 @@ pub(crate) fn emit( // Handle a positive int64, which is the "easy" case: a signed conversion will do the // right thing. - emit_signed_cvt(sink, info, state, src.to_reg(), *dst, *to_f64); + emit_signed_cvt( + sink, + info, + state, + src.to_reg(), + *dst, + *dst_size == OperandSize::Size64, + ); let inst = Inst::jmp_known(done); inst.emit(sink, info, state); @@ -2361,7 +2363,7 @@ pub(crate) fn emit( inst.emit(sink, info, state); let inst = Inst::alu_rmi_r( - true, /* 64bits */ + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::imm(1), *tmp_gpr2, @@ -2369,16 +2371,23 @@ pub(crate) fn emit( inst.emit(sink, info, state); let inst = Inst::alu_rmi_r( - true, /* 64bits */ + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp_gpr1.to_reg()), *tmp_gpr2, ); inst.emit(sink, info, state); - emit_signed_cvt(sink, info, state, tmp_gpr2.to_reg(), *dst, *to_f64); + emit_signed_cvt( + sink, + info, + state, + tmp_gpr2.to_reg(), + *dst, + *dst_size == OperandSize::Size64, + ); - let add_op = if *to_f64 { + let add_op = if *dst_size == OperandSize::Size64 { SseOpcode::Addsd } else { SseOpcode::Addss @@ -2475,7 +2484,7 @@ pub(crate) fn emit( if *is_saturating { // For NaN, emit 0. let inst = Inst::alu_rmi_r( - *dst_size == OperandSize::Size64, + *dst_size, AluRmiROpcode::Xor, RegMemImm::reg(dst.to_reg()), *dst, @@ -2666,7 +2675,7 @@ pub(crate) fn emit( if *is_saturating { // Emit 0. let inst = Inst::alu_rmi_r( - *dst_size == OperandSize::Size64, + *dst_size, AluRmiROpcode::Xor, RegMemImm::reg(dst.to_reg()), *dst, @@ -2698,7 +2707,7 @@ pub(crate) fn emit( // The input was "small" (< 2**(width -1)), so the only way to get an integer // overflow is because the input was too small: saturate to the min value, i.e. 0. let inst = Inst::alu_rmi_r( - *dst_size == OperandSize::Size64, + *dst_size, AluRmiROpcode::Xor, RegMemImm::reg(dst.to_reg()), *dst, @@ -2757,15 +2766,19 @@ pub(crate) fn emit( inst.emit(sink, info, state); let inst = Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(tmp_gpr.to_reg()), *dst, ); inst.emit(sink, info, state); } else { - let inst = - Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::imm(1 << 31), *dst); + let inst = Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Add, + RegMemImm::imm(1 << 31), + *dst, + ); inst.emit(sink, info, state); } @@ -2865,13 +2878,13 @@ pub(crate) fn emit( sink.bind_label(again_label); // movq %rax, %r11 - let i2 = Inst::mov_r_r(true, rax, r11_w); + let i2 = Inst::mov_r_r(OperandSize::Size64, rax, r11_w); i2.emit(sink, info, state); // opq %r10, %r11 let r10_rmi = RegMemImm::reg(r10); let i3 = if *op == inst_common::AtomicRmwOp::Xchg { - Inst::mov_r_r(true, r10, r11_w) + Inst::mov_r_r(OperandSize::Size64, r10, r11_w) } else { let alu_op = match op { inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add, @@ -2881,7 +2894,7 @@ pub(crate) fn emit( inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor, inst_common::AtomicRmwOp::Xchg => unreachable!(), }; - Inst::alu_rmi_r(true, alu_op, r10_rmi, r11_w) + Inst::alu_rmi_r(OperandSize::Size64, alu_op, r10_rmi, r11_w) }; i3.emit(sink, info, state); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 0fbd3a7429..b3394f096c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -885,23 +885,38 @@ fn test_x64_emit() { // // Alu_RMI_R insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::Add, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Add, + RegMemImm::reg(r15), + w_rdx, + ), "4C01FA", "addq %r15, %rdx", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::reg(rcx), w_r8), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Add, + RegMemImm::reg(rcx), + w_r8, + ), "4101C8", "addl %ecx, %r8d", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::reg(rcx), w_rsi), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Add, + RegMemImm::reg(rcx), + w_rsi, + ), "01CE", "addl %ecx, %esi", )); insns.push(( Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rdx, @@ -911,7 +926,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::mem(Amode::imm_reg(99, rdi)), w_r8, @@ -921,7 +936,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rsi, @@ -931,7 +946,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::imm(-127i32 as u32), w_rdx, @@ -941,7 +956,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::imm(-129i32 as u32), w_rdx, @@ -950,13 +965,18 @@ fn test_x64_emit() { "addq $-129, %rdx", )); insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::Add, RegMemImm::imm(76543210), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Add, + RegMemImm::imm(76543210), + w_rdx, + ), "4881C2EAF48F04", "addq $76543210, %rdx", )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::imm(-127i32 as u32), w_r8, @@ -966,7 +986,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::imm(-129i32 as u32), w_r8, @@ -976,7 +996,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::imm(-76543210i32 as u32), w_r8, @@ -986,7 +1006,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::imm(-127i32 as u32), w_rsi, @@ -996,7 +1016,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::imm(-129i32 as u32), w_rsi, @@ -1005,44 +1025,79 @@ fn test_x64_emit() { "addl $-129, %esi", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::imm(76543210), w_rsi), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Add, + RegMemImm::imm(76543210), + w_rsi, + ), "81C6EAF48F04", "addl $76543210, %esi", )); // This is pretty feeble insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::Sub, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Sub, + RegMemImm::reg(r15), + w_rdx, + ), "4C29FA", "subq %r15, %rdx", )); insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::And, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::And, + RegMemImm::reg(r15), + w_rdx, + ), "4C21FA", "andq %r15, %rdx", )); insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::Or, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Or, + RegMemImm::reg(r15), + w_rdx, + ), "4C09FA", "orq %r15, %rdx", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::And8, + RegMemImm::reg(r15), + w_rdx, + ), "4420FA", "andb %r15b, %dl", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(rax), w_rsi), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::And8, + RegMemImm::reg(rax), + w_rsi, + ), "4020C6", "andb %al, %sil", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(rax), w_rbx), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::And8, + RegMemImm::reg(rax), + w_rbx, + ), "20C3", "andb %al, %bl", )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::And8, RegMemImm::mem(Amode::imm_reg(0, rax)), w_rbx, @@ -1051,23 +1106,38 @@ fn test_x64_emit() { "andb 0(%rax), %bl", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Or8, + RegMemImm::reg(r15), + w_rdx, + ), "4408FA", "orb %r15b, %dl", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(rax), w_rsi), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Or8, + RegMemImm::reg(rax), + w_rsi, + ), "4008C6", "orb %al, %sil", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(rax), w_rbx), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Or8, + RegMemImm::reg(rax), + w_rbx, + ), "08C3", "orb %al, %bl", )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Or8, RegMemImm::mem(Amode::imm_reg(0, rax)), w_rbx, @@ -1076,29 +1146,49 @@ fn test_x64_emit() { "orb 0(%rax), %bl", )); insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::Xor, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Xor, + RegMemImm::reg(r15), + w_rdx, + ), "4C31FA", "xorq %r15, %rdx", )); // Test all mul cases, though insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::Mul, RegMemImm::reg(r15), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Mul, + RegMemImm::reg(r15), + w_rdx, + ), "490FAFD7", "imulq %r15, %rdx", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::reg(rcx), w_r8), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Mul, + RegMemImm::reg(rcx), + w_r8, + ), "440FAFC1", "imull %ecx, %r8d", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::reg(rcx), w_rsi), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Mul, + RegMemImm::reg(rcx), + w_rsi, + ), "0FAFF1", "imull %ecx, %esi", )); insns.push(( Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Mul, RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rdx, @@ -1108,7 +1198,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::mem(Amode::imm_reg(99, rdi)), w_r8, @@ -1118,7 +1208,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::mem(Amode::imm_reg(99, rdi)), w_rsi, @@ -1128,7 +1218,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Mul, RegMemImm::imm(-127i32 as u32), w_rdx, @@ -1138,7 +1228,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Mul, RegMemImm::imm(-129i32 as u32), w_rdx, @@ -1147,13 +1237,18 @@ fn test_x64_emit() { "imulq $-129, %rdx", )); insns.push(( - Inst::alu_rmi_r(true, AluRmiROpcode::Mul, RegMemImm::imm(76543210), w_rdx), + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Mul, + RegMemImm::imm(76543210), + w_rdx, + ), "4869D2EAF48F04", "imulq $76543210, %rdx", )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::imm(-127i32 as u32), w_r8, @@ -1163,7 +1258,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::imm(-129i32 as u32), w_r8, @@ -1173,7 +1268,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::imm(-76543210i32 as u32), w_r8, @@ -1183,7 +1278,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::imm(-127i32 as u32), w_rsi, @@ -1193,7 +1288,7 @@ fn test_x64_emit() { )); insns.push(( Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::imm(-129i32 as u32), w_rsi, @@ -1202,7 +1297,12 @@ fn test_x64_emit() { "imull $-129, %esi", )); insns.push(( - Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::imm(76543210), w_rsi), + Inst::alu_rmi_r( + OperandSize::Size32, + AluRmiROpcode::Mul, + RegMemImm::imm(76543210), + w_rsi, + ), "69F6EAF48F04", "imull $76543210, %esi", )); @@ -1442,42 +1542,42 @@ fn test_x64_emit() { // ======================================================== // Mov_R_R insns.push(( - Inst::mov_r_r(false, rbx, w_rsi), + Inst::mov_r_r(OperandSize::Size32, rbx, w_rsi), "89DE", "movl %ebx, %esi", )); insns.push(( - Inst::mov_r_r(false, rbx, w_r9), + Inst::mov_r_r(OperandSize::Size32, rbx, w_r9), "4189D9", "movl %ebx, %r9d", )); insns.push(( - Inst::mov_r_r(false, r11, w_rsi), + Inst::mov_r_r(OperandSize::Size32, r11, w_rsi), "4489DE", "movl %r11d, %esi", )); insns.push(( - Inst::mov_r_r(false, r12, w_r9), + Inst::mov_r_r(OperandSize::Size32, r12, w_r9), "4589E1", "movl %r12d, %r9d", )); insns.push(( - Inst::mov_r_r(true, rbx, w_rsi), + Inst::mov_r_r(OperandSize::Size64, rbx, w_rsi), "4889DE", "movq %rbx, %rsi", )); insns.push(( - Inst::mov_r_r(true, rbx, w_r9), + Inst::mov_r_r(OperandSize::Size64, rbx, w_r9), "4989D9", "movq %rbx, %r9", )); insns.push(( - Inst::mov_r_r(true, r11, w_rsi), + Inst::mov_r_r(OperandSize::Size64, r11, w_rsi), "4C89DE", "movq %r11, %rsi", )); insns.push(( - Inst::mov_r_r(true, r12, w_r9), + Inst::mov_r_r(OperandSize::Size64, r12, w_r9), "4D89E1", "movq %r12, %r9", )); @@ -3879,38 +3979,80 @@ fn test_x64_emit() { // ======================================================== // XmmRmRImm insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false), + Inst::xmm_rm_r_imm( + SseOpcode::Cmppd, + RegMem::reg(xmm5), + w_xmm1, + 2, + OperandSize::Size32, + ), "660FC2CD02", "cmppd $2, %xmm5, %xmm1", )); insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false), + Inst::xmm_rm_r_imm( + SseOpcode::Cmpps, + RegMem::reg(xmm15), + w_xmm7, + 0, + OperandSize::Size32, + ), "410FC2FF00", "cmpps $0, %xmm15, %xmm7", )); insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Palignr, RegMem::reg(xmm1), w_xmm9, 3, false), + Inst::xmm_rm_r_imm( + SseOpcode::Palignr, + RegMem::reg(xmm1), + w_xmm9, + 3, + OperandSize::Size32, + ), "66440F3A0FC903", "palignr $3, %xmm1, %xmm9", )); insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm7), w_xmm8, 3, false), + Inst::xmm_rm_r_imm( + SseOpcode::Roundps, + RegMem::reg(xmm7), + w_xmm8, + 3, + OperandSize::Size32, + ), "66440F3A08C703", "roundps $3, %xmm7, %xmm8", )); insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm10), w_xmm7, 2, false), + Inst::xmm_rm_r_imm( + SseOpcode::Roundpd, + RegMem::reg(xmm10), + w_xmm7, + 2, + OperandSize::Size32, + ), "66410F3A09FA02", "roundpd $2, %xmm10, %xmm7", )); insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm4), w_xmm8, 1, false), + Inst::xmm_rm_r_imm( + SseOpcode::Roundps, + RegMem::reg(xmm4), + w_xmm8, + 1, + OperandSize::Size32, + ), "66440F3A08C401", "roundps $1, %xmm4, %xmm8", )); insns.push(( - Inst::xmm_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm15), w_xmm15, 0, false), + Inst::xmm_rm_r_imm( + SseOpcode::Roundpd, + RegMem::reg(xmm15), + w_xmm15, + 0, + OperandSize::Size32, + ), "66450F3A09FF00", "roundpd $0, %xmm15, %xmm15", )); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 11e2244d35..d3b80b1c40 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -42,7 +42,7 @@ pub enum Inst { // Integer instructions. /// Integer arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg AluRmiR { - is_64: bool, + size: OperandSize, // 4 or 8 op: AluRmiROpcode, src: RegMemImm, dst: Writable, @@ -111,14 +111,14 @@ pub enum Inst { /// Constant materialization: (imm32 imm64) reg. /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32. Imm { - dst_is_64: bool, + dst_size: OperandSize, // 4 or 8 simm64: u64, dst: Writable, }, /// GPR to GPR move: mov (64 32) reg reg. MovRR { - is_64: bool, + size: OperandSize, // 4 or 8 src: Reg, dst: Writable, }, @@ -255,8 +255,7 @@ pub enum Inst { /// Converts an unsigned int64 to a float32/float64. CvtUint64ToFloatSeq { - /// Is the target a 64-bits or 32-bits register? - to_f64: bool, + dst_size: OperandSize, // 4 or 8 /// A copy of the source register, fed by lowering. It is marked as modified during /// register allocation to make sure that the temporary registers differ from the src /// register, since both registers are live at the same time in the generated code @@ -308,8 +307,7 @@ pub enum Inst { /// XMM (scalar) conditional move. /// Overwrites the destination register if cc is set. XmmCmove { - /// Whether the cmove is moving either 32 or 64 bits. - is_64: bool, + size: OperandSize, // 4 or 8 cc: CC, src: RegMem, dst: Writable, @@ -328,7 +326,7 @@ pub enum Inst { src: RegMem, dst: Writable, imm: u8, - is64: bool, + size: OperandSize, // 4 or 8 }, // ===================================== @@ -575,19 +573,15 @@ impl Inst { } pub(crate) fn alu_rmi_r( - is_64: bool, + size: OperandSize, op: AluRmiROpcode, src: RegMemImm, dst: Writable, ) -> Self { + debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); src.assert_regclass_is(RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Self::AluRmiR { - is_64, - op, - src, - dst, - } + Self::AluRmiR { size, op, src, dst } } pub(crate) fn unary_rm_r( @@ -598,7 +592,7 @@ impl Inst { ) -> Self { src.assert_regclass_is(RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); - debug_assert!(size.is_size(&[ + debug_assert!(size.is_one_of(&[ OperandSize::Size16, OperandSize::Size32, OperandSize::Size64 @@ -626,7 +620,7 @@ impl Inst { } pub(crate) fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst { - debug_assert!(size.is_size(&[ + debug_assert!(size.is_one_of(&[ OperandSize::Size16, OperandSize::Size32, OperandSize::Size64 @@ -657,22 +651,27 @@ impl Inst { Inst::SignExtendData { size } } - pub(crate) fn imm(size: OperandSize, simm64: u64, dst: Writable) -> Inst { + pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable) -> Inst { + debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(dst.to_reg().get_class() == RegClass::I64); // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches // the semantics of movl). - let dst_is_64 = size == OperandSize::Size64 && simm64 > u32::max_value() as u64; + let dst_size = match dst_size { + OperandSize::Size64 if simm64 > u32::max_value() as u64 => OperandSize::Size64, + _ => OperandSize::Size32, + }; Inst::Imm { - dst_is_64, + dst_size, simm64, dst, } } - pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable) -> Inst { + pub(crate) fn mov_r_r(size: OperandSize, src: Reg, dst: Writable) -> Inst { + debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(src.get_class() == RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::MovRR { is_64, src, dst } + Inst::MovRR { size, src, dst } } // TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level) @@ -723,7 +722,7 @@ impl Inst { ) -> Inst { debug_assert!(src.get_class() == RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::I64); - debug_assert!(dst_size.is_size(&[OperandSize::Size32, OperandSize::Size64])); + debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); Inst::XmmToGpr { op, src, @@ -739,7 +738,7 @@ impl Inst { dst: Writable, ) -> Inst { src.assert_regclass_is(RegClass::I64); - debug_assert!(src_size.is_size(&[OperandSize::Size32, OperandSize::Size64])); + debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(dst.to_reg().get_class() == RegClass::V128); Inst::GprToXmm { op, @@ -756,12 +755,13 @@ impl Inst { } pub(crate) fn cvt_u64_to_float_seq( - to_f64: bool, + dst_size: OperandSize, src: Writable, tmp_gpr1: Writable, tmp_gpr2: Writable, dst: Writable, ) -> Inst { + debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(src.to_reg().get_class() == RegClass::I64); debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64); debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64); @@ -771,7 +771,7 @@ impl Inst { dst, tmp_gpr1, tmp_gpr2, - to_f64, + dst_size, } } @@ -784,8 +784,8 @@ impl Inst { tmp_gpr: Writable, tmp_xmm: Writable, ) -> Inst { - debug_assert!(src_size.is_size(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(dst_size.is_size(&[OperandSize::Size32, OperandSize::Size64])); + debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); + debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(src.to_reg().get_class() == RegClass::V128); debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128); debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64); @@ -810,8 +810,8 @@ impl Inst { tmp_gpr: Writable, tmp_xmm: Writable, ) -> Inst { - debug_assert!(src_size.is_size(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(dst_size.is_size(&[OperandSize::Size32, OperandSize::Size64])); + debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); + debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(src.to_reg().get_class() == RegClass::V128); debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128); debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64); @@ -833,7 +833,7 @@ impl Inst { lhs: Reg, rhs_dst: Writable, ) -> Inst { - debug_assert!(size.is_size(&[OperandSize::Size32, OperandSize::Size64])); + debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert_eq!(lhs.get_class(), RegClass::V128); debug_assert_eq!(rhs_dst.to_reg().get_class(), RegClass::V128); Inst::XmmMinMaxSeq { @@ -849,14 +849,15 @@ impl Inst { src: RegMem, dst: Writable, imm: u8, - is64: bool, + size: OperandSize, ) -> Inst { + debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); Inst::XmmRmRImm { op, src, dst, imm, - is64, + size, } } @@ -890,7 +891,7 @@ impl Inst { pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable) -> Inst { src.assert_regclass_is(RegClass::I64); match src { - RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst), + RegMem::Reg { reg } => Self::mov_r_r(OperandSize::Size64, reg, dst), RegMem::Mem { addr } => Self::mov64_m_r(addr, dst), } } @@ -969,7 +970,7 @@ impl Inst { } pub(crate) fn cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable) -> Inst { - debug_assert!(size.is_size(&[ + debug_assert!(size.is_one_of(&[ OperandSize::Size16, OperandSize::Size32, OperandSize::Size64 @@ -978,15 +979,11 @@ impl Inst { Inst::Cmove { size, cc, src, dst } } - pub(crate) fn xmm_cmove(is_64: bool, cc: CC, src: RegMem, dst: Writable) -> Inst { + pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable) -> Inst { + debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); src.assert_regclass_is(RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Inst::XmmCmove { - is_64, - cc, - src, - dst, - } + Inst::XmmCmove { size, cc, src, dst } } pub(crate) fn push64(src: RegMemImm) -> Inst { @@ -1187,12 +1184,20 @@ impl Inst { types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to), types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to), types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to), - types::F32X4 => { - Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false) - } - types::F64X2 => { - Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false) - } + types::F32X4 => Inst::xmm_rm_r_imm( + SseOpcode::Cmpps, + from, + to, + FcmpImm::Equal.encode(), + OperandSize::Size32, + ), + types::F64X2 => Inst::xmm_rm_r_imm( + SseOpcode::Cmppd, + from, + to, + FcmpImm::Equal.encode(), + OperandSize::Size32, + ), _ => unimplemented!("unimplemented type for Inst::equals: {}", ty), } } @@ -1257,34 +1262,30 @@ impl PrettyPrint for Inst { ljustify(s1 + &s2) } - fn suffix_lq(is_64: bool) -> String { - (if is_64 { "q" } else { "l" }).to_string() - } - - fn suffix_lqb(is_64: bool, is_8: bool) -> String { - match (is_64, is_8) { - (_, true) => "b".to_string(), - (true, false) => "q".to_string(), - (false, false) => "l".to_string(), + fn suffix_lq(size: OperandSize) -> String { + match size { + OperandSize::Size32 => "l", + OperandSize::Size64 => "q", + _ => unreachable!(), } + .to_string() } - fn size_lq(is_64: bool) -> u8 { - if is_64 { - 8 - } else { - 4 + fn suffix_lqb(size: OperandSize, is_8: bool) -> String { + match (size, is_8) { + (_, true) => "b", + (OperandSize::Size32, false) => "l", + (OperandSize::Size64, false) => "q", + _ => unreachable!(), } + .to_string() } - fn size_lqb(is_64: bool, is_8: bool) -> u8 { + fn size_lqb(size: OperandSize, is_8: bool) -> u8 { if is_8 { - 1 - } else if is_64 { - 8 - } else { - 4 + return 1; } + size.to_bytes() } fn suffix_bwlq(size: OperandSize) -> String { @@ -1299,16 +1300,11 @@ impl PrettyPrint for Inst { match self { Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len), - Inst::AluRmiR { - is_64, - op, - src, - dst, - } => format!( + Inst::AluRmiR { size, op, src, dst } => format!( "{} {}, {}", - ljustify2(op.to_string(), suffix_lqb(*is_64, op.is_8bit())), - src.show_rru_sized(mb_rru, size_lqb(*is_64, op.is_8bit())), - show_ireg_sized(dst.to_reg(), mb_rru, size_lqb(*is_64, op.is_8bit())), + ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())), + src.show_rru_sized(mb_rru, size_lqb(*size, op.is_8bit())), + show_ireg_sized(dst.to_reg(), mb_rru, size_lqb(*size, op.is_8bit())), ), Inst::UnaryRmR { src, dst, op, size } => format!( @@ -1426,14 +1422,18 @@ impl PrettyPrint for Inst { src, dst, imm, - is64, + size, .. } => format!( "{} ${}, {}, {}", ljustify(format!( "{}{}", op.to_string(), - if *is64 { ".w" } else { "" } + if *size == OperandSize::Size64 { + ".w" + } else { + "" + } )), imm, src.show_rru(mb_rru), @@ -1483,12 +1483,16 @@ impl PrettyPrint for Inst { ), Inst::CvtUint64ToFloatSeq { - src, dst, to_f64, .. + src, dst, dst_size, .. } => format!( "{} {}, {}", ljustify(format!( "u64_to_{}_seq", - if *to_f64 { "f64" } else { "f32" } + if *dst_size == OperandSize::Size64 { + "f64" + } else { + "f32" + } )), show_ireg_sized(src.to_reg(), mb_rru, 8), dst.show_rru(mb_rru), @@ -1529,11 +1533,11 @@ impl PrettyPrint for Inst { ), Inst::Imm { - dst_is_64, + dst_size, simm64, dst, } => { - if *dst_is_64 { + if *dst_size == OperandSize::Size64 { format!( "{} ${}, {}", ljustify("movabsq".to_string()), @@ -1550,11 +1554,11 @@ impl PrettyPrint for Inst { } } - Inst::MovRR { is_64, src, dst } => format!( + Inst::MovRR { size, src, dst } => format!( "{} {}, {}", - ljustify2("mov".to_string(), suffix_lq(*is_64)), - show_ireg_sized(*src, mb_rru, size_lq(*is_64)), - show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64)) + ljustify2("mov".to_string(), suffix_lq(*size)), + show_ireg_sized(*src, mb_rru, size.to_bytes()), + show_ireg_sized(dst.to_reg(), mb_rru, size.to_bytes()) ), Inst::MovzxRmR { @@ -1665,19 +1669,17 @@ impl PrettyPrint for Inst { show_ireg_sized(dst.to_reg(), mb_rru, size.to_bytes()) ), - Inst::XmmCmove { - is_64, - cc, - src, - dst, - } => { - let size = if *is_64 { 8 } else { 4 }; + Inst::XmmCmove { size, cc, src, dst } => { format!( "j{} $next; mov{} {}, {}; $next: ", cc.invert().to_string(), - if *is_64 { "sd" } else { "ss" }, - src.show_rru_sized(mb_rru, size), - show_ireg_sized(dst.to_reg(), mb_rru, size) + if *size == OperandSize::Size64 { + "sd" + } else { + "ss" + }, + src.show_rru_sized(mb_rru, size.to_bytes()), + show_ireg_sized(dst.to_reg(), mb_rru, size.to_bytes()) ) } @@ -2482,9 +2484,9 @@ impl MachInst for Inst { // out the upper 32 bits of the destination. For example, we could // conceivably use `movl %reg, %reg` to zero out the top 32 bits of // %reg. - Self::MovRR { - is_64, src, dst, .. - } if *is_64 => Some((*dst, *src)), + Self::MovRR { size, src, dst, .. } if *size == OperandSize::Size64 => { + Some((*dst, *src)) + } // Note as well that MOVS[S|D] when used in the `XmmUnaryRmR` context are pure moves of // scalar floating-point values (and annotate `dst` as `def`s to the register allocator) // whereas the same operation in a packed context, e.g. `XMM_RM_R`, is used to merge a @@ -2559,7 +2561,7 @@ impl MachInst for Inst { // If this isn't true, we have gone way off the rails. debug_assert!(rc_dst == rc_src); match rc_dst { - RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), + RegClass::I64 => Inst::mov_r_r(OperandSize::Size64, src_reg, dst_reg), RegClass::V128 => { // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions", // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for @@ -2691,20 +2693,22 @@ impl MachInst for Inst { || ty == types::R32 || ty == types::R64 ); + // Immediates must be 32 or 64 bits. + // Smaller types are widened. + let size = match OperandSize::from_ty(ty) { + OperandSize::Size64 => OperandSize::Size64, + _ => OperandSize::Size32, + }; if value == 0 { ret.push(Inst::alu_rmi_r( - ty == types::I64, + size, AluRmiROpcode::Xor, RegMemImm::reg(to_reg.to_reg()), to_reg, )); } else { let value = value as u64; - ret.push(Inst::imm( - OperandSize::from_bytes(ty.bytes()), - value.into(), - to_reg, - )); + ret.push(Inst::imm(size, value.into(), to_reg)); } } } diff --git a/cranelift/codegen/src/isa/x64/inst/unwind.rs b/cranelift/codegen/src/isa/x64/inst/unwind.rs index ffe43930f0..cc73bddce9 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind.rs @@ -1,6 +1,6 @@ use crate::isa::unwind::input::UnwindInfo; use crate::isa::x64::inst::{ - args::{AluRmiROpcode, Amode, RegMemImm, SyntheticAmode}, + args::{AluRmiROpcode, Amode, OperandSize, RegMemImm, SyntheticAmode}, regs, Inst, }; use crate::machinst::{UnwindInfoContext, UnwindInfoGenerator}; @@ -50,7 +50,7 @@ impl UnwindInfoGenerator for X64UnwindInfo { } } Inst::AluRmiR { - is_64: true, + size: OperandSize::Size64, op: AluRmiROpcode::Sub, src: RegMemImm::Imm { simm32 }, dst, @@ -75,7 +75,7 @@ impl UnwindInfoGenerator for X64UnwindInfo { )); } Inst::AluRmiR { - is_64: true, + size: OperandSize::Size64, op: AluRmiROpcode::Add, src: RegMemImm::Imm { simm32 }, dst, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 5e9a8e826d..c720b28a32 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -288,20 +288,26 @@ fn emit_insert_lane>( ty: Type, ) { if !ty.is_float() { - let (sse_op, is64) = match ty.lane_bits() { - 8 => (SseOpcode::Pinsrb, false), - 16 => (SseOpcode::Pinsrw, false), - 32 => (SseOpcode::Pinsrd, false), - 64 => (SseOpcode::Pinsrd, true), + let (sse_op, size) = match ty.lane_bits() { + 8 => (SseOpcode::Pinsrb, OperandSize::Size32), + 16 => (SseOpcode::Pinsrw, OperandSize::Size32), + 32 => (SseOpcode::Pinsrd, OperandSize::Size32), + 64 => (SseOpcode::Pinsrd, OperandSize::Size64), _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()), }; - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64)); + ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, size)); } else if ty == types::F32 { let sse_op = SseOpcode::Insertps; // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane // shifted into bits 5:6). let lane = 0b00_00_00_00 | lane << 4; - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false)); + ctx.emit(Inst::xmm_rm_r_imm( + sse_op, + src, + dst, + lane, + OperandSize::Size32, + )); } else if ty == types::F64 { let sse_op = match lane { // Move the lowest quadword in replacement to vector without changing @@ -330,15 +336,15 @@ fn emit_extract_lane>( ty: Type, ) { if !ty.is_float() { - let (sse_op, is64) = match ty.lane_bits() { - 8 => (SseOpcode::Pextrb, false), - 16 => (SseOpcode::Pextrw, false), - 32 => (SseOpcode::Pextrd, false), - 64 => (SseOpcode::Pextrd, true), + let (sse_op, size) = match ty.lane_bits() { + 8 => (SseOpcode::Pextrb, OperandSize::Size32), + 16 => (SseOpcode::Pextrw, OperandSize::Size32), + 32 => (SseOpcode::Pextrd, OperandSize::Size32), + 64 => (SseOpcode::Pextrd, OperandSize::Size64), _ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()), }; let src = RegMem::reg(src); - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64)); + ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, size)); } else if ty == types::F32 || ty == types::F64 { if lane == 0 { // Remove the extractlane instruction, leaving the float where it is. The upper @@ -366,7 +372,13 @@ fn emit_extract_lane>( _ => unreachable!(), }; let src = RegMem::reg(src); - ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false)); + ctx.emit(Inst::xmm_rm_r_imm( + sse_op, + src, + dst, + mask, + OperandSize::Size32, + )); } } else { panic!("unable to emit extractlane for type: {}", ty) @@ -404,13 +416,13 @@ fn emit_cmp>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntC ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo)); ctx.emit(Inst::setcc(CC::Z, cmp2)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(cmp1.to_reg()), cmp2, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::imm(1), cmp2, @@ -423,13 +435,13 @@ fn emit_cmp>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntC ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo)); ctx.emit(Inst::setcc(CC::NZ, cmp2)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(cmp1.to_reg()), cmp2, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::imm(1), cmp2, @@ -453,19 +465,19 @@ fn emit_cmp>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntC ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo)); ctx.emit(Inst::setcc(CC::from_intcc(cc.unsigned()), cmp3)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(cmp2.to_reg()), cmp3, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(cmp1.to_reg()), cmp3, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::imm(1), cmp3, @@ -623,14 +635,14 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, )); // tmp1 = (src >> 1) & 0b0101.. ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp2.to_reg()), tmp1, )); // tmp2 = src & 0b0101.. ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp0.to_reg()), tmp2, @@ -645,7 +657,7 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, // tmp0 = (src >> 1) & 0b0101.. | (src & 0b0101..) << 1 ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp1.to_reg()), tmp0, @@ -665,13 +677,13 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp2.to_reg()), tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp0.to_reg()), tmp2, @@ -684,7 +696,7 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, )); ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp1.to_reg()), tmp0, @@ -704,13 +716,13 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp2.to_reg()), tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp0.to_reg()), tmp2, @@ -723,7 +735,7 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, )); ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp1.to_reg()), tmp0, @@ -744,13 +756,13 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp2.to_reg()), tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp0.to_reg()), tmp2, @@ -763,7 +775,7 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, )); ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp1.to_reg()), tmp0, @@ -785,13 +797,13 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp2.to_reg()), tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp0.to_reg()), tmp2, @@ -804,7 +816,7 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, )); ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp1.to_reg()), tmp0, @@ -826,13 +838,13 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp2.to_reg()), tmp1, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(tmp0.to_reg()), tmp2, @@ -845,7 +857,7 @@ fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, )); ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp1.to_reg()), tmp0, @@ -915,7 +927,7 @@ fn emit_shl_i128>( ctx.emit(Inst::imm(OperandSize::Size64, 64, amt)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::reg(amt_src), amt, @@ -935,14 +947,14 @@ fn emit_shl_i128>( )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp2.to_reg()), tmp3, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dst_lo.to_reg()), dst_lo, @@ -951,7 +963,7 @@ fn emit_shl_i128>( // register allocator happy, because it cannot otherwise // infer that cmovz + cmovnz always defines dst_hi. ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dst_hi.to_reg()), dst_hi, @@ -959,7 +971,7 @@ fn emit_shl_i128>( ctx.emit(Inst::gen_move(amt, amt_src, types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::imm(64), amt, @@ -1045,7 +1057,7 @@ fn emit_shr_i128>( ctx.emit(Inst::imm(OperandSize::Size64, 64, amt)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::reg(amt_src), amt, @@ -1065,7 +1077,7 @@ fn emit_shr_i128>( )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp2.to_reg()), tmp3, @@ -1081,7 +1093,7 @@ fn emit_shr_i128>( )); } else { ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dst_hi.to_reg()), dst_hi, @@ -1091,7 +1103,7 @@ fn emit_shr_i128>( // register allocator happy, because it cannot otherwise // infer that cmovz + cmovnz always defines dst_lo. ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dst_lo.to_reg()), dst_lo, @@ -1099,7 +1111,7 @@ fn emit_shr_i128>( ctx.emit(Inst::gen_move(amt, amt_src, types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::imm(64), amt, @@ -1374,7 +1386,11 @@ fn emit_clz>( )); ctx.emit(Inst::alu_rmi_r( - ty == types::I64, + if ty == types::I64 { + OperandSize::Size64 + } else { + OperandSize::Size32 + }, AluRmiROpcode::Sub, RegMemImm::reg(tmp.to_reg()), dst, @@ -1654,13 +1670,13 @@ fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(dst.regs()[0], lhs.regs()[0], types::I64)); ctx.emit(Inst::gen_move(dst.regs()[1], lhs.regs()[1], types::I64)); ctx.emit(Inst::alu_rmi_r( - /* is_64 = */ true, + OperandSize::Size64, alu_ops.0, RegMemImm::reg(rhs.regs()[0]), dst.regs()[0], )); ctx.emit(Inst::alu_rmi_r( - /* is_64 = */ true, + OperandSize::Size64, alu_ops.1, RegMemImm::reg(rhs.regs()[1]), dst.regs()[1], @@ -1684,27 +1700,27 @@ fn lower_insn_to_regs>( let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::gen_move(dst.regs()[0], lhs.regs()[0], types::I64)); ctx.emit(Inst::alu_rmi_r( - /* is_64 = */ true, + OperandSize::Size64, AluRmiROpcode::Mul, RegMemImm::reg(rhs.regs()[0]), dst.regs()[0], )); ctx.emit(Inst::gen_move(dst.regs()[1], lhs.regs()[0], types::I64)); ctx.emit(Inst::alu_rmi_r( - /* is_64 = */ true, + OperandSize::Size64, AluRmiROpcode::Mul, RegMemImm::reg(rhs.regs()[1]), dst.regs()[1], )); ctx.emit(Inst::gen_move(tmp, lhs.regs()[1], types::I64)); ctx.emit(Inst::alu_rmi_r( - /* is_64 = */ true, + OperandSize::Size64, AluRmiROpcode::Mul, RegMemImm::reg(rhs.regs()[0]), tmp, )); ctx.emit(Inst::alu_rmi_r( - /* is_64 = */ true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(tmp.to_reg()), dst.regs()[1], @@ -1720,14 +1736,18 @@ fn lower_insn_to_regs>( RegMem::reg(rhs.regs()[0]), )); ctx.emit(Inst::alu_rmi_r( - /* is_64 = */ true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(regs::rdx()), dst.regs()[1], )); } } else { - let is_64 = ty == types::I64; + let size = if ty == types::I64 { + OperandSize::Size64 + } else { + OperandSize::Size32 + }; let alu_op = match op { Opcode::Iadd | Opcode::IaddIfcout => AluRmiROpcode::Add, Opcode::Isub => AluRmiROpcode::Sub, @@ -1766,8 +1786,8 @@ fn lower_insn_to_regs>( }; let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::mov_r_r(true, lhs, dst)); - ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst)); + ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst)); + ctx.emit(Inst::alu_rmi_r(size, alu_op, rhs, dst)); } } @@ -1952,9 +1972,9 @@ fn lower_insn_to_regs>( }; let w_rcx = Writable::from_reg(regs::rcx()); - ctx.emit(Inst::mov_r_r(true, lhs, dst)); + ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst)); if count.is_none() { - ctx.emit(Inst::mov_r_r(true, rhs.unwrap(), w_rcx)); + ctx.emit(Inst::mov_r_r(OperandSize::Size64, rhs.unwrap(), w_rcx)); } ctx.emit(Inst::shift_r(size, shift_kind, count, dst)); } else if dst_ty == types::I128 { @@ -1983,7 +2003,7 @@ fn lower_insn_to_regs>( let inv_amt = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::imm(OperandSize::Size64, 128, inv_amt)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::reg(amt_src), inv_amt, @@ -1996,13 +2016,13 @@ fn lower_insn_to_regs>( /* is_signed = */ false, ); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp.regs()[0].to_reg()), dst.regs()[0], )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp.regs()[1].to_reg()), dst.regs()[1], @@ -2019,20 +2039,20 @@ fn lower_insn_to_regs>( let inv_amt = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::imm(OperandSize::Size64, 128, inv_amt)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::reg(amt_src), inv_amt, )); emit_shl_i128(ctx, src, dst, inv_amt.to_reg()); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp.regs()[0].to_reg()), dst.regs()[0], )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Or, RegMemImm::reg(tmp.regs()[1].to_reg()), dst.regs()[1], @@ -2187,12 +2207,16 @@ fn lower_insn_to_regs>( // register. RegMemImm::Reg { reg } => { let bigger_shift_by_gpr = ctx.alloc_tmp(shift_by_ty).only_reg().unwrap(); - ctx.emit(Inst::mov_r_r(true, reg, bigger_shift_by_gpr)); + ctx.emit(Inst::mov_r_r(OperandSize::Size64, reg, bigger_shift_by_gpr)); - let is_64 = shift_by_ty == types::I64; + let size = if shift_by_ty == types::I64 { + OperandSize::Size64 + } else { + OperandSize::Size32 + }; let imm = RegMemImm::imm(8); ctx.emit(Inst::alu_rmi_r( - is_64, + size, AluRmiROpcode::Add, imm, bigger_shift_by_gpr, @@ -2270,7 +2294,7 @@ fn lower_insn_to_regs>( } else { let dynamic_shift_by = put_input_in_reg(ctx, inputs[1]); let w_rcx = Writable::from_reg(regs::rcx()); - ctx.emit(Inst::mov_r_r(true, dynamic_shift_by, w_rcx)); + ctx.emit(Inst::mov_r_r(OperandSize::Size64, dynamic_shift_by, w_rcx)); ctx.emit(Inst::shift_r(OperandSize::Size64, kind, None, reg)); }; }; @@ -2410,7 +2434,7 @@ fn lower_insn_to_regs>( emit_clz(ctx, types::I64, types::I64, src_hi, tmp1); emit_clz(ctx, types::I64, types::I64, src_lo, dst); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::imm(64), dst, @@ -2427,7 +2451,7 @@ fn lower_insn_to_regs>( dst, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dsts.regs()[1].to_reg()), dsts.regs()[1], @@ -2486,7 +2510,7 @@ fn lower_insn_to_regs>( emit_ctz(ctx, types::I64, types::I64, src_lo, dst); emit_ctz(ctx, types::I64, types::I64, src_hi, tmp1); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::imm(64), tmp1, @@ -2503,7 +2527,7 @@ fn lower_insn_to_regs>( dst, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dsts.regs()[1].to_reg()), dsts.regs()[1], @@ -2566,7 +2590,7 @@ fn lower_insn_to_regs>( tmp, )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(tmp.to_reg()), dst, @@ -2574,7 +2598,7 @@ fn lower_insn_to_regs>( // Zero the result's high component. ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dsts.regs()[1].to_reg()), dsts.regs()[1], @@ -2609,8 +2633,6 @@ fn lower_insn_to_regs>( let dst = ctx.alloc_tmp(types::I64).only_reg().unwrap(); dsts.push(dst.to_reg()); if ty == types::I64 { - let is_64 = true; - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let cst = ctx.alloc_tmp(types::I64).only_reg().unwrap(); @@ -2631,7 +2653,7 @@ fn lower_insn_to_regs>( // andq cst, tmp1 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(cst.to_reg()), tmp1, @@ -2642,7 +2664,7 @@ fn lower_insn_to_regs>( // sub tmp1, tmp2 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::reg(tmp1.to_reg()), tmp2, @@ -2658,7 +2680,7 @@ fn lower_insn_to_regs>( // and cst, tmp1 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(cst.to_reg()), tmp1, @@ -2666,7 +2688,7 @@ fn lower_insn_to_regs>( // sub tmp1, tmp2 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::reg(tmp1.to_reg()), tmp2, @@ -2682,7 +2704,7 @@ fn lower_insn_to_regs>( // and cst, tmp1 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(cst.to_reg()), tmp1, @@ -2690,7 +2712,7 @@ fn lower_insn_to_regs>( // sub tmp1, tmp2 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::Sub, RegMemImm::reg(tmp1.to_reg()), tmp2, @@ -2709,7 +2731,7 @@ fn lower_insn_to_regs>( // add tmp2, dst ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(tmp2.to_reg()), dst, @@ -2720,7 +2742,7 @@ fn lower_insn_to_regs>( // and cst, dst ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::And, RegMemImm::reg(cst.to_reg()), dst, @@ -2731,7 +2753,7 @@ fn lower_insn_to_regs>( // mul cst, dst ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size64, AluRmiROpcode::Mul, RegMemImm::reg(cst.to_reg()), dst, @@ -2746,7 +2768,6 @@ fn lower_insn_to_regs>( )); } else { assert_eq!(ty, types::I32); - let is_64 = false; let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); @@ -2764,7 +2785,7 @@ fn lower_insn_to_regs>( // andq $0x7777_7777, tmp1 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::And, RegMemImm::imm(0x77777777), tmp1, @@ -2775,7 +2796,7 @@ fn lower_insn_to_regs>( // sub tmp1, tmp2 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::Sub, RegMemImm::reg(tmp1.to_reg()), tmp2, @@ -2791,7 +2812,7 @@ fn lower_insn_to_regs>( // and 0x7777_7777, tmp1 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::And, RegMemImm::imm(0x77777777), tmp1, @@ -2799,7 +2820,7 @@ fn lower_insn_to_regs>( // sub tmp1, tmp2 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::Sub, RegMemImm::reg(tmp1.to_reg()), tmp2, @@ -2815,7 +2836,7 @@ fn lower_insn_to_regs>( // and $0x7777_7777, tmp1 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::And, RegMemImm::imm(0x77777777), tmp1, @@ -2823,7 +2844,7 @@ fn lower_insn_to_regs>( // sub tmp1, tmp2 ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::Sub, RegMemImm::reg(tmp1.to_reg()), tmp2, @@ -2842,7 +2863,7 @@ fn lower_insn_to_regs>( // add tmp2, dst ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::reg(tmp2.to_reg()), dst, @@ -2850,7 +2871,7 @@ fn lower_insn_to_regs>( // and $0x0F0F_0F0F, dst ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::And, RegMemImm::imm(0x0F0F0F0F), dst, @@ -2858,7 +2879,7 @@ fn lower_insn_to_regs>( // mul $0x0101_0101, dst ctx.emit(Inst::alu_rmi_r( - is_64, + OperandSize::Size32, AluRmiROpcode::Mul, RegMemImm::imm(0x01010101), dst, @@ -2882,13 +2903,13 @@ fn lower_insn_to_regs>( let final_dst = get_output_reg(ctx, outputs[0]); ctx.emit(Inst::gen_move(final_dst.regs()[0], dsts[0], types::I64)); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(dsts[1]), final_dst.regs()[0], )); ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(final_dst.regs()[1].to_reg()), final_dst.regs()[1], @@ -2996,7 +3017,7 @@ fn lower_insn_to_regs>( } else { // Zero-extend: just zero the top word. ctx.emit(Inst::alu_rmi_r( - true, + OperandSize::Size64, AluRmiROpcode::Xor, RegMemImm::reg(dst.regs()[1].to_reg()), dst.regs()[1], @@ -3208,7 +3229,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::setcc(cc1, tmp)); ctx.emit(Inst::setcc(cc2, dst)); ctx.emit(Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::And, RegMemImm::reg(tmp.to_reg()), dst, @@ -3219,7 +3240,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::setcc(cc1, tmp)); ctx.emit(Inst::setcc(cc2, dst)); ctx.emit(Inst::alu_rmi_r( - false, + OperandSize::Size32, AluRmiROpcode::Or, RegMemImm::reg(tmp.to_reg()), dst, @@ -3269,7 +3290,13 @@ fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(dst, lhs, input_ty)); // Emit the comparison. - ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false)); + ctx.emit(Inst::xmm_rm_r_imm( + op, + rhs, + dst, + imm.encode(), + OperandSize::Size32, + )); } } @@ -3382,7 +3409,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::setcc(cc1, tmp)); ctx.emit(Inst::setcc(cc2, tmp2)); ctx.emit(Inst::alu_rmi_r( - false, /* is_64 */ + OperandSize::Size32, AluRmiROpcode::And, RegMemImm::reg(tmp.to_reg()), tmp2, @@ -3600,7 +3627,7 @@ fn lower_insn_to_regs>( RegMem::reg(tmp_xmm1.to_reg()), dst, cond.encode(), - false, + OperandSize::Size32, )); ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); @@ -3683,7 +3710,7 @@ fn lower_insn_to_regs>( RegMem::reg(tmp_xmm1.to_reg()), dst, cond.encode(), - false, + OperandSize::Size32, )); // The dst register holds a mask for lanes containing NaNs. @@ -3828,7 +3855,11 @@ fn lower_insn_to_regs>( let tmp_gpr1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let tmp_gpr2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::cvt_u64_to_float_seq( - ty == types::F64, + if ty == types::F64 { + OperandSize::Size64 + } else { + OperandSize::Size32 + }, src_copy, tmp_gpr1, tmp_gpr2, @@ -3955,7 +3986,7 @@ fn lower_insn_to_regs>( RegMem::reg(tmp.to_reg()), tmp, cond.encode(), - false, + OperandSize::Size32, )); ctx.emit(Inst::xmm_rm_r( SseOpcode::Andps, @@ -4088,7 +4119,7 @@ fn lower_insn_to_regs>( RegMem::from(tmp1), tmp2, cond.encode(), - false, + OperandSize::Size32, )); // Convert those set of lanes that have the max_signed_int factored out. @@ -4141,7 +4172,7 @@ fn lower_insn_to_regs>( RegMem::reg(src), dst, 8, - false, + OperandSize::Size32, )); ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::from(dst), dst)); } @@ -4152,7 +4183,7 @@ fn lower_insn_to_regs>( RegMem::reg(src), dst, 8, - false, + OperandSize::Size32, )); ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::from(dst), dst)); } @@ -4175,7 +4206,7 @@ fn lower_insn_to_regs>( RegMem::reg(src), dst, 8, - false, + OperandSize::Size32, )); ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::from(dst), dst)); } @@ -4186,7 +4217,7 @@ fn lower_insn_to_regs>( RegMem::reg(src), dst, 8, - false, + OperandSize::Size32, )); ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::from(dst), dst)); } @@ -4336,7 +4367,7 @@ fn lower_insn_to_regs>( RegMem::reg(tmp.to_reg()), tmp, cond.encode(), - false, + OperandSize::Size32, ); ctx.emit(cmpps); @@ -4439,7 +4470,13 @@ fn lower_insn_to_regs>( }; let src = input_to_reg_mem(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::xmm_rm_r_imm(op, src, dst, mode.encode(), false)); + ctx.emit(Inst::xmm_rm_r_imm( + op, + src, + dst, + mode.encode(), + OperandSize::Size32, + )); } else { // Lower to VM calls when there's no access to SSE4.1. // Note, for vector types on platforms that don't support sse41 @@ -4888,6 +4925,11 @@ fn lower_insn_to_regs>( emit_moves(ctx, dst, rhs, ty); + let operand_size = if ty == types::F64 { + OperandSize::Size64 + } else { + OperandSize::Size32 + }; match fcmp_results { FcmpCondResult::Condition(cc) => { if is_int_or_ref_ty(ty) || ty == types::I128 || ty == types::B128 { @@ -4895,7 +4937,7 @@ fn lower_insn_to_regs>( emit_cmoves(ctx, size, cc, lhs, dst); } else { ctx.emit(Inst::xmm_cmove( - ty == types::F64, + operand_size, cc, RegMem::reg(lhs.only_reg().unwrap()), dst.only_reg().unwrap(), @@ -4915,13 +4957,13 @@ fn lower_insn_to_regs>( emit_cmoves(ctx, size, cc2, lhs, dst); } else { ctx.emit(Inst::xmm_cmove( - ty == types::F64, + operand_size, cc1, RegMem::reg(lhs.only_reg().unwrap()), dst.only_reg().unwrap(), )); ctx.emit(Inst::xmm_cmove( - ty == types::F64, + operand_size, cc2, RegMem::reg(lhs.only_reg().unwrap()), dst.only_reg().unwrap(), @@ -4972,7 +5014,11 @@ fn lower_insn_to_regs>( } else { debug_assert!(ty == types::F32 || ty == types::F64); ctx.emit(Inst::xmm_cmove( - ty == types::F64, + if ty == types::F64 { + OperandSize::Size64 + } else { + OperandSize::Size32 + }, cc, RegMem::reg(lhs.only_reg().unwrap()), dst.only_reg().unwrap(), @@ -5007,7 +5053,11 @@ fn lower_insn_to_regs>( debug_assert!(ty == types::F32 || ty == types::F64); emit_moves(ctx, dst, rhs, ty); ctx.emit(Inst::xmm_cmove( - ty == types::F64, + if ty == types::F64 { + OperandSize::Size64 + } else { + OperandSize::Size32 + }, cc, RegMem::reg(lhs.only_reg().unwrap()), dst.only_reg().unwrap(), @@ -5398,7 +5448,7 @@ fn lower_insn_to_regs>( RegMem::from(dst), dst, 0, - false, + OperandSize::Size32, )) } 32 => { @@ -5409,7 +5459,7 @@ fn lower_insn_to_regs>( RegMem::from(dst), dst, 0, - false, + OperandSize::Size32, )) } 64 => { @@ -5714,7 +5764,7 @@ impl LowerBackend for X64Backend { )); ctx.emit(Inst::setcc(half_cc, tmp2)); ctx.emit(Inst::alu_rmi_r( - false, + OperandSize::Size32, comb_op, RegMemImm::reg(tmp1.to_reg()), tmp2,