From 5df88404836f66298e5cdce7565e3cb8a8e6805f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 11 Nov 2020 13:30:14 -0800 Subject: [PATCH] Add support for brff/brif and icmp_sp to new x64 backend to support Lucet. `lucetc` currently *almost*, but not quite, works with the new x64 backend; the only missing piece is support for the particular instructions emitted as part of its prologue stack-check. We do not normally see `brff`, `brif`, or `ifcmp_sp` in CLIF generated by `cranelift-wasm` without the old-backend legalization rules, so these were not supported in the new x64 backend as they were not necessary for Wasm MVP support. Using them resulted in an `unimplemented!()` panic. This PR adds support for `brff` and `brif` analogously to how AArch64 implements them, by pattern-matching the `ifcmp` / `ffcmp` directly. Then `ifcmp_sp` is a straightforward variant of `ifcmp`. Along the way, this also removes the notion of "fallthrough block" from the branch-group lowering method; instead, `fallthrough` instructions are handled as normal branches to their explicitly-provided targets, which (in the original CLIF) match the fallthrough block. The reason for this is that the block reordering done as part of lowering can change the fallthrough block. We were not using `fallthrough` instructions in the output produced by `cranelift-wasm`, so this, too, was not previously caught. With these changes, the `lucetc` crate in Lucet passes all tests with the `x64` feature-flag added to its `cranelift-codegen` dependency. --- cranelift/codegen/src/isa/aarch64/lower.rs | 3 +- .../codegen/src/isa/aarch64/lower_inst.rs | 11 ++- cranelift/codegen/src/isa/arm32/lower.rs | 3 +- cranelift/codegen/src/isa/arm32/lower_inst.rs | 8 +- cranelift/codegen/src/isa/x64/lower.rs | 72 +++++++++++++-- cranelift/codegen/src/machinst/lower.rs | 15 +--- .../filetests/filetests/isa/x64/branches.clif | 88 +++++++++++++++++++ 7 files changed, 167 insertions(+), 33 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/branches.clif diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index bcc5cbeaf0..1e7c63742b 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1173,9 +1173,8 @@ impl LowerBackend for AArch64Backend { ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], - fallthrough: Option, ) -> CodegenResult<()> { - lower_inst::lower_branch(ctx, branches, targets, fallthrough) + lower_inst::lower_branch(ctx, branches, targets) } fn maybe_pinned_reg(&self) -> Option { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 957d15c57c..5b26a66f63 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3172,7 +3172,6 @@ pub(crate) fn lower_branch>( ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], - fallthrough: Option, ) -> CodegenResult<()> { // A block should end with at most two branches. The first may be a // conditional branch; a conditional branch can be followed only by an @@ -3189,11 +3188,11 @@ pub(crate) fn lower_branch>( assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough); let taken = BranchTarget::Label(targets[0]); - let not_taken = match op1 { - Opcode::Jump => BranchTarget::Label(targets[1]), - Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()), - _ => unreachable!(), // assert above. - }; + // not_taken target is the target of the second branch, even if it is a Fallthrough + // instruction: because we reorder blocks while we lower, the fallthrough in the new + // order is not (necessarily) the same as the fallthrough in CLIF. So we use the + // explicitly-provided target. + let not_taken = BranchTarget::Label(targets[1]); match op0 { Opcode::Brz | Opcode::Brnz => { diff --git a/cranelift/codegen/src/isa/arm32/lower.rs b/cranelift/codegen/src/isa/arm32/lower.rs index 7c11ae95ba..c0305e762c 100644 --- a/cranelift/codegen/src/isa/arm32/lower.rs +++ b/cranelift/codegen/src/isa/arm32/lower.rs @@ -229,9 +229,8 @@ impl LowerBackend for Arm32Backend { ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], - fallthrough: Option, ) -> CodegenResult<()> { - lower_inst::lower_branch(ctx, branches, targets, fallthrough) + lower_inst::lower_branch(ctx, branches, targets) } fn maybe_pinned_reg(&self) -> Option { diff --git a/cranelift/codegen/src/isa/arm32/lower_inst.rs b/cranelift/codegen/src/isa/arm32/lower_inst.rs index 1cff717da3..6294f8d6f9 100644 --- a/cranelift/codegen/src/isa/arm32/lower_inst.rs +++ b/cranelift/codegen/src/isa/arm32/lower_inst.rs @@ -559,7 +559,6 @@ pub(crate) fn lower_branch>( ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], - fallthrough: Option, ) -> CodegenResult<()> { // A block should end with at most two branches. The first may be a // conditional branch; a conditional branch can be followed only by an @@ -576,11 +575,8 @@ pub(crate) fn lower_branch>( assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough); let taken = BranchTarget::Label(targets[0]); - let not_taken = match op1 { - Opcode::Jump => BranchTarget::Label(targets[1]), - Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()), - _ => unreachable!(), // assert above. - }; + let not_taken = BranchTarget::Label(targets[1]); + match op0 { Opcode::Brz | Opcode::Brnz => { let rn = input_to_reg( diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 7e1c2d51ab..0a59255d9c 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -3793,7 +3793,6 @@ impl LowerBackend for X64Backend { ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], - fallthrough: Option, ) -> CodegenResult<()> { // A block should end with at most two branches. The first may be a // conditional branch; a conditional branch can be followed only by an @@ -3816,11 +3815,11 @@ impl LowerBackend for X64Backend { assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough); let taken = targets[0]; - let not_taken = match op1 { - Opcode::Jump => targets[1], - Opcode::Fallthrough => fallthrough.unwrap(), - _ => unreachable!(), // assert above. - }; + // not_taken target is the target of the second branch, even if it is a Fallthrough + // instruction: because we reorder blocks while we lower, the fallthrough in the new + // order is not (necessarily) the same as the fallthrough in CLIF. So we use the + // explicitly-provided target. + let not_taken = targets[1]; match op0 { Opcode::Brz | Opcode::Brnz => { @@ -3913,6 +3912,67 @@ impl LowerBackend for X64Backend { } } + Opcode::Brif => { + let flag_input = InsnInput { + insn: branches[0], + input: 0, + }; + + if let Some(ifcmp) = matches_input(ctx, flag_input, Opcode::Ifcmp) { + emit_cmp(ctx, ifcmp); + let cond_code = ctx.data(branches[0]).cond_code().unwrap(); + let cc = CC::from_intcc(cond_code); + ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); + } else if let Some(ifcmp_sp) = matches_input(ctx, flag_input, Opcode::IfcmpSp) { + let operand = put_input_in_reg( + ctx, + InsnInput { + insn: ifcmp_sp, + input: 0, + }, + ); + let ty = ctx.input_ty(ifcmp_sp, 0); + ctx.emit(Inst::cmp_rmi_r( + ty.bytes() as u8, + RegMemImm::reg(operand), + regs::rsp(), + )); + let cond_code = ctx.data(branches[0]).cond_code().unwrap(); + let cc = CC::from_intcc(cond_code); + ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); + } else { + // Should be disallowed by flags checks in verifier. + unimplemented!("Brif with non-ifcmp input"); + } + } + Opcode::Brff => { + let flag_input = InsnInput { + insn: branches[0], + input: 0, + }; + + if let Some(ffcmp) = matches_input(ctx, flag_input, Opcode::Ffcmp) { + let cond_code = ctx.data(branches[0]).fp_cond_code().unwrap(); + match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) { + FcmpCondResult::Condition(cc) => { + ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); + } + FcmpCondResult::AndConditions(cc1, cc2) => { + ctx.emit(Inst::jmp_if(cc1.invert(), not_taken)); + ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken)); + } + FcmpCondResult::OrConditions(cc1, cc2) => { + ctx.emit(Inst::jmp_if(cc1, taken)); + ctx.emit(Inst::jmp_cond(cc2, taken, not_taken)); + } + FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), + } + } else { + // Should be disallowed by flags checks in verifier. + unimplemented!("Brff with input not from ffcmp"); + } + } + _ => panic!("unexpected branch opcode: {:?}", op0), } } else { diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 531085ae4e..d457dcff0d 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -209,7 +209,6 @@ pub trait LowerBackend { ctx: &mut C, insts: &[Inst], targets: &[MachLabel], - fallthrough: Option, ) -> CodegenResult<()>; /// A bit of a hack: give a fixed register that always holds the result of a @@ -663,13 +662,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> { block: Block, branches: &SmallVec<[Inst; 2]>, targets: &SmallVec<[MachLabel; 2]>, - maybe_fallthrough: Option, ) -> CodegenResult<()> { debug!( - "lower_clif_branches: block {} branches {:?} targets {:?} maybe_fallthrough {:?}", - block, branches, targets, maybe_fallthrough + "lower_clif_branches: block {} branches {:?} targets {:?}", + block, branches, targets, ); - backend.lower_branch_group(self, branches, targets, maybe_fallthrough)?; + backend.lower_branch_group(self, branches, targets)?; let loc = self.srcloc(branches[0]); self.finish_ir_inst(loc); Ok(()) @@ -744,12 +742,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { if let Some(bb) = lb.orig_block() { self.collect_branches_and_targets(bindex, bb, &mut branches, &mut targets); if branches.len() > 0 { - let maybe_fallthrough = if (bindex + 1) < (lowered_order.len() as BlockIndex) { - Some(MachLabel::from_block(bindex + 1)) - } else { - None - }; - self.lower_clif_branches(backend, bb, &branches, &targets, maybe_fallthrough)?; + self.lower_clif_branches(backend, bb, &branches, &targets)?; self.finish_ir_inst(self.srcloc(branches[0])); } } else { diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif new file mode 100644 index 0000000000..79461ba2cb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -0,0 +1,88 @@ +test compile +target x86_64 +feature "experimental_x64" + +function %f0(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + brnz v2, block1 + jump block2 + ; check: cmpl %esi, %edi + ; nextln: jz label1; j label2 + +block1: + v3 = iconst.i32 1 + ; check: movl $$1, %eax + return v3 + ; check: ret + +block2: + v4 = iconst.i32 2 + ; check: movl $$2, %eax + return v4 + ; check: ret +} + +function %f1(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + brz v2, block1 + jump block2 + ; check: cmpl %esi, %edi + ; nextln: jnz label1; j label2 + +block1: + v3 = iconst.i32 1 + ; check: movl $$1, %eax + return v3 + ; check: ret + +block2: + v4 = iconst.i32 2 + ; check: movl $$2, %eax + return v4 + ; check: ret +} + +function %f2(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ifcmp v0, v1 + brif eq v2, block1 + jump block2 + ; check: cmpl %esi, %edi + ; nextln: jz label1; j label2 + +block1: + v3 = iconst.i32 1 + ; check: movl $$1, %eax + return v3 + ; check: ret + +block2: + v4 = iconst.i32 2 + ; check: movl $$2, %eax + return v4 + ; check: ret +} + +function %f3(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = ffcmp v0, v1 + brff eq v2, block1 + jump block2 + ; check: ucomiss %xmm1, %xmm0 + ; nextln: jp label2 + ; nextln: jnz label2; j label1 + +block1: + v3 = iconst.i32 1 + ; check: movl $$1, %eax + return v3 + ; check: ret + +block2: + v4 = iconst.i32 2 + ; check: movl $$2, %eax + return v4 + ; check: ret +}