From d8dbabfe6b6c8bc41f3406fa58f927f8d21f2f29 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 30 Nov 2022 14:44:59 -0800 Subject: [PATCH] Don't reuse registers in the x64 div lowering (#5356) Introduce a temporary for an intermediate value in the lowering of div in the x64 backend. Additionally, add a src argument to the shift_r smart constructor, which is why the diff got larger than just the div lowering. --- cranelift/codegen/src/isa/x64/inst/emit.rs | 1 + .../codegen/src/isa/x64/inst/emit_tests.rs | 30 +++++++++++++++++++ cranelift/codegen/src/isa/x64/inst/mod.rs | 3 +- cranelift/codegen/src/isa/x64/lower/isle.rs | 9 +++--- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 73e25ca470..6c22bb0453 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2344,6 +2344,7 @@ pub(crate) fn emit( OperandSize::Size64, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 1 }).unwrap(), + tmp_gpr1, Writable::from_reg(tmp_gpr1), ); inst.emit(&[], sink, info, state); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 4249846a00..3428b31cf4 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2893,6 +2893,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rdi, w_rdi, ), "D3E7", @@ -2903,6 +2904,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + r12, w_r12, ), "41D3E4", @@ -2913,6 +2915,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + r8, w_r8, ), "41C1E002", @@ -2923,6 +2926,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), + r13, w_r13, ), "41C1E51F", @@ -2933,6 +2937,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + r13, w_r13, ), "49D3E5", @@ -2943,6 +2948,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rdi, w_rdi, ), "48D3E7", @@ -2953,6 +2959,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + r8, w_r8, ), "49C1E002", @@ -2963,6 +2970,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(), + rbx, w_rbx, ), "48C1E303", @@ -2973,6 +2981,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftLeft, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), + r13, w_r13, ), "49C1E53F", @@ -2983,6 +2992,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rdi, w_rdi, ), "D3EF", @@ -2993,6 +3003,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + r8, w_r8, ), "41C1E802", @@ -3003,6 +3014,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), + r13, w_r13, ), "41C1ED1F", @@ -3013,6 +3025,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rdi, w_rdi, ), "48D3EF", @@ -3023,6 +3036,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + r8, w_r8, ), "49C1E802", @@ -3033,6 +3047,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), + r13, w_r13, ), "49C1ED3F", @@ -3043,6 +3058,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftRightArithmetic, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rdi, w_rdi, ), "D3FF", @@ -3053,6 +3069,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftRightArithmetic, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + r8, w_r8, ), "41C1F802", @@ -3063,6 +3080,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::ShiftRightArithmetic, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), + r13, w_r13, ), "41C1FD1F", @@ -3073,6 +3091,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftRightArithmetic, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rdi, w_rdi, ), "48D3FF", @@ -3083,6 +3102,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftRightArithmetic, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + r8, w_r8, ), "49C1F802", @@ -3093,6 +3113,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::ShiftRightArithmetic, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), + r13, w_r13, ), "49C1FD3F", @@ -3103,6 +3124,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::RotateLeft, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + r8, w_r8, ), "49D3C0", @@ -3113,6 +3135,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::RotateLeft, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(), + r9, w_r9, ), "41C1C103", @@ -3123,6 +3146,7 @@ fn test_x64_emit() { OperandSize::Size32, ShiftKind::RotateRight, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rsi, w_rsi, ), "D3CE", @@ -3133,6 +3157,7 @@ fn test_x64_emit() { OperandSize::Size64, ShiftKind::RotateRight, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + r15, w_r15, ), "49C1CF05", @@ -3143,6 +3168,7 @@ fn test_x64_emit() { OperandSize::Size8, ShiftKind::RotateRight, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rsi, w_rsi, ), "40D2CE", @@ -3153,6 +3179,7 @@ fn test_x64_emit() { OperandSize::Size8, ShiftKind::RotateRight, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rax, w_rax, ), "D2C8", @@ -3163,6 +3190,7 @@ fn test_x64_emit() { OperandSize::Size8, ShiftKind::RotateRight, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + r15, w_r15, ), "41C0CF05", @@ -3173,6 +3201,7 @@ fn test_x64_emit() { OperandSize::Size16, ShiftKind::RotateRight, Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + rsi, w_rsi, ), "66D3CE", @@ -3183,6 +3212,7 @@ fn test_x64_emit() { OperandSize::Size16, ShiftKind::RotateRight, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + r15, w_r15, ), "6641C1CF05", diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 4704f809bf..f9984c87b1 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -431,6 +431,7 @@ impl Inst { size: OperandSize, kind: ShiftKind, num_bits: Imm8Gpr, + src: Reg, dst: Writable, ) -> Inst { if let Imm8Reg::Imm8 { imm: num_bits } = num_bits.clone().to_imm8_reg() { @@ -440,7 +441,7 @@ impl Inst { Inst::ShiftR { size, kind, - src: Gpr::new(dst.to_reg()).unwrap(), + src: Gpr::new(src).unwrap(), num_bits, dst: WritableGpr::from_writable_reg(dst).unwrap(), } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 53a7de61a5..1fa9d652a4 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -980,18 +980,17 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { )); } else { if size == OperandSize::Size8 { + let tmp = self.temp_writable_reg(ty); // The remainder is in AH. Right-shift by 8 bits then move from rax. self.lower_ctx.emit(MInst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(), - dst_quotient, - )); - self.lower_ctx.emit(MInst::gen_move( - dst.to_writable_reg(), dst_quotient.to_reg(), - ty, + tmp, )); + self.lower_ctx + .emit(MInst::gen_move(dst.to_writable_reg(), tmp.to_reg(), ty)); } else { // The remainder is in rdx. self.lower_ctx.emit(MInst::gen_move(