From dbd2241b6066060c359318e945098998656c704e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 4 Jan 2021 16:42:24 -0800 Subject: [PATCH] x64: handle tests of b1 values correctly (only LSB is defined). Previously, `select` and `brz`/`brnz` instructions, when given a `b1` boolean argument, would test whether that boolean argument was nonzero, rather than whether its LSB was nonzero. Since our invariant for mapping CLIF state to machine state is that bits beyond the width of a value are undefined, the proper lowering is to test only the LSB. (aarch64 does not have the same issue because its `Extend` pseudoinst already properly handles masking of b1 values when a zero-extend is requested, as it is for select/brz/brnz.) Found by Nathan Ringo on Zulip [1] (thanks!). [1] https://bytecodealliance.zulipchat.com/#narrow/stream/217117-cranelift/topic/bnot.20on.20b1s --- cranelift/codegen/src/isa/x64/inst/args.rs | 8 ++ cranelift/codegen/src/isa/x64/inst/emit.rs | 45 +++++++--- .../codegen/src/isa/x64/inst/emit_tests.rs | 82 +++++++++++++++++++ cranelift/codegen/src/isa/x64/inst/mod.rs | 52 ++++++++++-- .../src/isa/x64/inst/unwind/systemv.rs | 2 +- cranelift/codegen/src/isa/x64/lower.rs | 38 +++++++-- cranelift/filetests/filetests/isa/x64/b1.clif | 73 +++++++++++++++++ 7 files changed, 271 insertions(+), 29 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/b1.clif diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 4c61954630..680d0921ff 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -397,6 +397,14 @@ impl fmt::Display for UnaryRmROpcode { } } +#[derive(Clone, Copy, PartialEq)] +pub enum CmpOpcode { + /// CMP instruction: compute `a - b` and set flags from result. + Cmp, + /// TEST instruction: compute `a & b` and set flags from result. + Test, +} + pub(crate) enum InstructionSet { SSE, SSE2, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index fb32635a92..0bd8af840f 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1311,7 +1311,13 @@ pub(crate) fn emit( size, src: src_e, dst: reg_g, + opcode, } => { + let is_cmp = match opcode { + CmpOpcode::Cmp => true, + CmpOpcode::Test => false, + }; + let mut prefix = LegacyPrefixes::None; if *size == 2 { prefix = LegacyPrefixes::_66; @@ -1342,16 +1348,26 @@ pub(crate) fn emit( } } - // Use the swapped operands encoding, to stay consistent with the output of + // Use the swapped operands encoding for CMP, to stay consistent with the output of // gcc/llvm. - let opcode = if *size == 1 { 0x38 } else { 0x39 }; + let opcode = match (*size, is_cmp) { + (1, true) => 0x38, + (_, true) => 0x39, + (1, false) => 0x84, + (_, false) => 0x85, + }; emit_std_reg_reg(sink, prefix, opcode, 1, *reg_e, *reg_g, rex); } RegMemImm::Mem { addr } => { let addr = &addr.finalize(state, sink); - // Whereas here we revert to the "normal" G-E ordering. - let opcode = if *size == 1 { 0x3A } else { 0x3B }; + // Whereas here we revert to the "normal" G-E ordering for CMP. + let opcode = match (*size, is_cmp) { + (1, true) => 0x3A, + (_, true) => 0x3B, + (1, false) => 0x84, + (_, false) => 0x85, + }; emit_std_reg_mem(sink, state, info, prefix, opcode, 1, *reg_g, addr, rex); } @@ -1361,16 +1377,25 @@ pub(crate) fn emit( let use_imm8 = low8_will_sign_extend_to_32(*simm32); // And also here we use the "normal" G-E ordering. - let opcode = if *size == 1 { - 0x80 - } else if use_imm8 { - 0x83 + let opcode = if is_cmp { + if *size == 1 { + 0x80 + } else if use_imm8 { + 0x83 + } else { + 0x81 + } } else { - 0x81 + if *size == 1 { + 0xF6 + } else { + 0xF7 + } }; + let subopcode = if is_cmp { 7 } else { 0 }; let enc_g = int_reg_enc(*reg_g); - emit_std_enc_enc(sink, prefix, opcode, 1, 7 /*subopcode*/, enc_g, rex); + emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_g, rex); emit_simm(sink, if use_imm8 { 1 } else { *size }, *simm32); } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index e2afa80e2a..48e831b2d8 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2648,6 +2648,88 @@ fn test_x64_emit() { "cmpb %r13b, %r14b", )); + // ======================================================== + // TestRmiR + insns.push(( + Inst::test_rmi_r(8, RegMemImm::reg(r15), rdx), + "4C85FA", + "testq %r15, %rdx", + )); + insns.push(( + Inst::test_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "48855763", + "testq 99(%rdi), %rdx", + )); + insns.push(( + Inst::test_rmi_r(8, RegMemImm::imm(76543210), rdx), + "48F7C2EAF48F04", + "testq $76543210, %rdx", + )); + // + insns.push(( + Inst::test_rmi_r(4, RegMemImm::reg(r15), rdx), + "4485FA", + "testl %r15d, %edx", + )); + insns.push(( + Inst::test_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "855763", + "testl 99(%rdi), %edx", + )); + insns.push(( + Inst::test_rmi_r(4, RegMemImm::imm(76543210), rdx), + "F7C2EAF48F04", + "testl $76543210, %edx", + )); + // + insns.push(( + Inst::test_rmi_r(2, RegMemImm::reg(r15), rdx), + "664485FA", + "testw %r15w, %dx", + )); + insns.push(( + Inst::test_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "66855763", + "testw 99(%rdi), %dx", + )); + insns.push(( + Inst::test_rmi_r(2, RegMemImm::imm(23210), rdx), + "66F7C2AA5A", + "testw $23210, %dx", + )); + // + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(r15), rdx), + "4484FA", + "testb %r15b, %dl", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx), + "845763", + "testb 99(%rdi), %dl", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::imm(70), rdx), + "F6C246", + "testb $70, %dl", + )); + // Extra byte-cases (paranoia!) for test_rmi_r for first operand = R + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(rax), rbx), + "84C3", + "testb %al, %bl", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(rcx), rsi), + "4084CE", + "testb %cl, %sil", + )); + insns.push(( + Inst::test_rmi_r(1, RegMemImm::reg(rcx), r10), + "4184CA", + "testb %cl, %r10b", + )); + // ======================================================== // SetCC insns.push((Inst::setcc(CC::O, w_rsi), "400F90C6", "seto %sil")); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 806e8f276e..0f0866c813 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -168,9 +168,10 @@ pub enum Inst { dst: Writable, }, - /// Integer comparisons/tests: cmp (b w l q) (reg addr imm) reg. + /// Integer comparisons/tests: cmp or test (b w l q) (reg addr imm) reg. CmpRmiR { size: u8, // 1, 2, 4 or 8 + opcode: CmpOpcode, src: RegMemImm, dst: Reg, }, @@ -913,8 +914,30 @@ impl Inst { ) -> Inst { src.assert_regclass_is(RegClass::I64); debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); - debug_assert!(dst.get_class() == RegClass::I64); - Inst::CmpRmiR { size, src, dst } + debug_assert_eq!(dst.get_class(), RegClass::I64); + Inst::CmpRmiR { + size, + src, + dst, + opcode: CmpOpcode::Cmp, + } + } + + /// Does a comparison of dst & src for operands of size `size`. + pub(crate) fn test_rmi_r( + size: u8, // 1, 2, 4 or 8 + src: RegMemImm, + dst: Reg, + ) -> Inst { + src.assert_regclass_is(RegClass::I64); + debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); + debug_assert_eq!(dst.get_class(), RegClass::I64); + Inst::CmpRmiR { + size, + src, + dst, + opcode: CmpOpcode::Test, + } } pub(crate) fn trap(trap_code: TrapCode) -> Inst { @@ -1597,12 +1620,23 @@ impl PrettyPrint for Inst { dst.to_reg().show_rru(mb_rru) ), - Inst::CmpRmiR { size, src, dst } => format!( - "{} {}, {}", - ljustify2("cmp".to_string(), suffix_bwlq(*size)), - src.show_rru_sized(mb_rru, *size), - show_ireg_sized(*dst, mb_rru, *size) - ), + Inst::CmpRmiR { + size, + src, + dst, + opcode, + } => { + let op = match opcode { + CmpOpcode::Cmp => "cmp", + CmpOpcode::Test => "test", + }; + format!( + "{} {}, {}", + ljustify2(op.to_string(), suffix_bwlq(*size)), + src.show_rru_sized(mb_rru, *size), + show_ireg_sized(*dst, mb_rru, *size) + ) + } Inst::Setcc { cc, dst } => format!( "{} {}", diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index 68473a8afb..e89b8a24ff 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -175,7 +175,7 @@ mod tests { _ => panic!("expected unwind information"), }; - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 23, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (16, RememberState), (18, RestoreState)] }"); + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 22, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (15, RememberState), (17, RestoreState)] }"); } fn create_multi_return_function(call_conv: CallConv) -> Function { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 6e6198c44b..9299fce738 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -248,12 +248,6 @@ fn non_reg_input_to_sext_imm(input: NonRegInput, input_ty: Type) -> Option }) } -fn input_to_sext_imm>(ctx: &mut C, spec: InsnInput) -> Option { - let input = ctx.get_input_as_source_or_const(spec.insn, spec.input); - let input_ty = ctx.input_ty(spec.insn, spec.input); - non_reg_input_to_sext_imm(input, input_ty) -} - fn input_to_imm>(ctx: &mut C, spec: InsnInput) -> Option { ctx.get_input_as_source_or_const(spec.insn, spec.input) .constant @@ -3731,10 +3725,25 @@ fn lower_insn_to_regs>( let cond_code = ctx.data(icmp).cond_code().unwrap(); CC::from_intcc(cond_code) } else { - // The input is a boolean value, compare it against zero. + let sel_ty = ctx.input_ty(insn, 0); let size = ctx.input_ty(insn, 0).bytes() as u8; let test = put_input_in_reg(ctx, flag_input); - ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test)); + let test_input = if sel_ty == types::B1 { + // The input is a boolean value; test the LSB for nonzero with: + // test reg, 1 + RegMemImm::imm(1) + } else { + // The input is an integer; test the whole value for + // nonzero with: + // test reg, reg + // + // (It doesn't make sense to have a boolean wider than + // one bit here -- which bit would cause us to select an + // input?) + assert!(!is_bool_ty(sel_ty)); + RegMemImm::reg(test) + }; + ctx.emit(Inst::test_rmi_r(size, test_input, test)); CC::NZ }; @@ -4355,7 +4364,18 @@ impl LowerBackend for X64Backend { _ => unreachable!(), }; let size_bytes = src_ty.bytes() as u8; - ctx.emit(Inst::cmp_rmi_r(size_bytes, RegMemImm::imm(0), src)); + // See case for `Opcode::Select` above re: testing the + // boolean input. + let test_input = if src_ty == types::B1 { + // test src, 1 + RegMemImm::imm(1) + } else { + assert!(!is_bool_ty(src_ty)); + // test src, src + RegMemImm::reg(src) + }; + + ctx.emit(Inst::test_rmi_r(size_bytes, test_input, src)); ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); } else { unimplemented!("brz/brnz with non-int type {:?}", src_ty); diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif new file mode 100644 index 0000000000..7b65fa4e55 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -0,0 +1,73 @@ +test compile +target x86_64 +feature "experimental_x64" + +function %f0(b1, i32, i32) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + +block0(v0: b1, v1: i32, v2: i32): + v3 = select.i32 v0, v1, v2 +; nextln: testb $$1, %dil +; nextln: cmovnzl %esi, %edx + + return v3 +; nextln: movq %rdx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} + +function %f1(b1) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + +block0(v0: b1): + brnz v0, block1 + jump block2 +; nextln: testb $$1, %dil +; nextln: jnz label1; j label2 + +block1: + v1 = iconst.i32 1 + return v1 +; check: movl $$1, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +block2: + v2 = iconst.i32 2 + return v2 +; check: movl $$2, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} + +function %f2(b1) -> i32 { +; check: pushq %rbp +; nextln: movq %rsp, %rbp + +block0(v0: b1): + brz v0, block1 + jump block2 +; nextln: testb $$1, %dil +; nextln: jz label1; j label2 + +block1: + v1 = iconst.i32 1 + return v1 +; check: movl $$1, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +block2: + v2 = iconst.i32 2 + return v2 +; check: movl $$2, %eax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +}