x64: Lower stack_addr, udiv, sdiv, urem, srem, umulhi, smulhi in ISLE (#4741)

Lower stack_addr, udiv, sdiv, urem, srem, umulhi, and smulhi in ISLE.

For udiv, sdiv, urem, and srem I opted to move the original lowering into an extern constructor, as the interactions with rax and rdx for the div instruction didn't seem meaningful to implement in ISLE. However, I'm happy to revisit this choice and move more of the embedding into ISLE.
This commit is contained in:
Trevor Elliott
2022-08-23 11:22:49 -07:00
committed by GitHub
parent 3b68d76905
commit b5f1ab7780
13 changed files with 585 additions and 159 deletions

View File

@@ -2738,7 +2738,7 @@
(rule (mul_hi ty signed src1 src2)
(let ((dst_lo WritableGpr (temp_writable_gpr))
(dst_hi WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(size OperandSize (raw_operand_size_of_type ty))
(_ Unit (emit (MInst.MulHi size
signed
src1
@@ -3587,6 +3587,25 @@
(rule (bitcast_gpr_to_xmm $I64 src)
(gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))
;;;; Stack Addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl stack_addr_impl (StackSlot Offset32) Gpr)
(rule (stack_addr_impl stack_slot offset)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
dst))
;;;; Division/Remainders ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl emit_div_or_rem (DivOrRemKind Type WritableGpr Gpr Gpr) Unit)
(extern constructor emit_div_or_rem emit_div_or_rem)
(decl div_or_rem (DivOrRemKind Value Value) Gpr)
(rule (div_or_rem kind a @ (value_type ty) b)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit_div_or_rem kind ty dst a b)))
dst))
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Gpr InstOutput output_gpr)

View File

@@ -1575,7 +1575,7 @@ impl fmt::Display for ShiftKind {
}
/// What kind of division or remainer instruction this is?
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq)]
pub enum DivOrRemKind {
SignedDiv,
UnsignedDiv,

View File

@@ -49,6 +49,23 @@ impl Inst {
dst: WritableXmm::from_writable_reg(dst).unwrap(),
}
}
fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst {
debug_assert!(size.is_one_of(&[
OperandSize::Size16,
OperandSize::Size32,
OperandSize::Size64
]));
rhs.assert_regclass_is(RegClass::Int);
Inst::MulHi {
size,
signed,
src1: Gpr::new(regs::rax()).unwrap(),
src2: GprMem::new(rhs).unwrap(),
dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
}
}
}
#[test]

View File

@@ -208,23 +208,6 @@ impl Inst {
}
}
pub(crate) fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst {
debug_assert!(size.is_one_of(&[
OperandSize::Size16,
OperandSize::Size32,
OperandSize::Size64
]));
rhs.assert_regclass_is(RegClass::Int);
Inst::MulHi {
size,
signed,
src1: Gpr::new(regs::rax()).unwrap(),
src2: GprMem::new(rhs).unwrap(),
dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
}
}
pub(crate) fn checked_div_or_rem_seq(
kind: DivOrRemKind,
size: OperandSize,

View File

@@ -3426,3 +3426,62 @@
(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundZero)))
;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (stack_addr stack_slot offset))
(stack_addr_impl stack_slot offset))
;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (udiv a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.UnsignedDiv) a b))
;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (sdiv a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.SignedDiv) a b))
;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (urem a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.UnsignedRem) a b))
;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (srem a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.SignedRem) a b))
;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (umulhi a @ (value_type $I16) b))
(let ((res ValueRegs (mul_hi $I16 $false a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))
(rule (lower (umulhi a @ (value_type $I32) b))
(let ((res ValueRegs (mul_hi $I32 $false a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))
(rule (lower (umulhi a @ (value_type $I64) b))
(let ((res ValueRegs (mul_hi $I64 $false a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))
;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (smulhi a @ (value_type $I16) b))
(let ((res ValueRegs (mul_hi $I16 $true a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))
(rule (lower (smulhi a @ (value_type $I32) b))
(let ((res ValueRegs (mul_hi $I32 $true a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))
(rule (lower (smulhi a @ (value_type $I64) b))
(let ((res ValueRegs (mul_hi $I64 $true a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))

View File

@@ -14,7 +14,6 @@ use crate::machinst::*;
use crate::result::CodegenResult;
use crate::settings::{Flags, TlsModel};
use smallvec::SmallVec;
use std::convert::TryFrom;
use target_lexicon::Triple;
//=============================================================================
@@ -574,150 +573,19 @@ fn lower_insn_to_regs(
| Opcode::Ceil
| Opcode::Floor
| Opcode::Nearest
| Opcode::Trunc => {
| Opcode::Trunc
| Opcode::StackAddr
| Opcode::Udiv
| Opcode::Urem
| Opcode::Sdiv
| Opcode::Srem
| Opcode::Umulhi
| Opcode::Smulhi => {
implemented_in_isle(ctx);
}
Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"),
Opcode::StackAddr => {
let (stack_slot, offset) = match *ctx.data(insn) {
InstructionData::StackLoad {
opcode: Opcode::StackAddr,
stack_slot,
offset,
} => (stack_slot, offset),
_ => unreachable!(),
};
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let offset: i32 = offset.into();
let inst =
ctx.abi()
.sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
ctx.emit(inst);
}
Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
let kind = match op {
Opcode::Udiv => DivOrRemKind::UnsignedDiv,
Opcode::Sdiv => DivOrRemKind::SignedDiv,
Opcode::Urem => DivOrRemKind::UnsignedRem,
Opcode::Srem => DivOrRemKind::SignedRem,
_ => unreachable!(),
};
let is_div = kind.is_div();
let input_ty = ctx.input_ty(insn, 0);
let size = OperandSize::from_ty(input_ty);
let dividend = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rax()),
dividend,
input_ty,
));
// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
if flags.avoid_div_traps() || op == Opcode::Srem {
// A vcode meta-instruction is used to lower the inline checks, since they embed
// pc-relative offsets that must not change, thus requiring regalloc to not
// interfere by introducing spills and reloads.
//
// Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
// regalloc is aware of the coalescing opportunity between rax/rdx and the
// destination register.
let divisor = put_input_in_reg(ctx, inputs[1]);
let divisor_copy = ctx.alloc_tmp(types::I64).only_reg().unwrap();
ctx.emit(Inst::gen_move(divisor_copy, divisor, types::I64));
let tmp = if op == Opcode::Sdiv && size == OperandSize::Size64 {
Some(ctx.alloc_tmp(types::I64).only_reg().unwrap())
} else {
None
};
// TODO use xor
ctx.emit(Inst::imm(
OperandSize::Size32,
0,
Writable::from_reg(regs::rdx()),
));
ctx.emit(Inst::checked_div_or_rem_seq(kind, size, divisor_copy, tmp));
} else {
// We don't want more than one trap record for a single instruction,
// so let's not allow the "mem" case (load-op merging) here; force
// divisor into a register instead.
let divisor = RegMem::reg(put_input_in_reg(ctx, inputs[1]));
// Fill in the high parts:
if kind.is_signed() {
// sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
// signed opcodes.
ctx.emit(Inst::sign_extend_data(size));
} else if input_ty == types::I8 {
ctx.emit(Inst::movzx_rm_r(
ExtMode::BL,
RegMem::reg(regs::rax()),
Writable::from_reg(regs::rax()),
));
} else {
// zero for unsigned opcodes.
ctx.emit(Inst::imm(
OperandSize::Size64,
0,
Writable::from_reg(regs::rdx()),
));
}
// Emit the actual idiv.
ctx.emit(Inst::div(size, kind.is_signed(), divisor));
}
// Move the result back into the destination reg.
if is_div {
// The quotient is in rax.
ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
} else {
if size == OperandSize::Size8 {
// The remainder is in AH. Right-shift by 8 bits then move from rax.
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(8),
Writable::from_reg(regs::rax()),
));
ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
} else {
// The remainder is in rdx.
ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
}
}
}
Opcode::Umulhi | Opcode::Smulhi => {
let input_ty = ctx.input_ty(insn, 0);
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
// Move lhs in %rax.
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rax()),
lhs,
input_ty,
));
// Emit the actual mul or imul.
let signed = op == Opcode::Smulhi;
ctx.emit(Inst::mul_hi(OperandSize::from_ty(input_ty), signed, rhs));
// Read the result from the high part (stored in %rdx).
ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
}
Opcode::GetPinnedReg => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), types::I64));

View File

@@ -848,6 +848,108 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
self.lower_ctx
.use_constant(VCodeConstantData::WellKnown(&UMAX_MASK))
}
fn emit_div_or_rem(
&mut self,
kind: &DivOrRemKind,
ty: Type,
dst: WritableGpr,
dividend: Gpr,
divisor: Gpr,
) {
let is_div = kind.is_div();
let size = OperandSize::from_ty(ty);
self.lower_ctx.emit(MInst::gen_move(
Writable::from_reg(regs::rax()),
dividend.to_reg(),
ty,
));
// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
if self.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem {
// A vcode meta-instruction is used to lower the inline checks, since they embed
// pc-relative offsets that must not change, thus requiring regalloc to not
// interfere by introducing spills and reloads.
//
// Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
// regalloc is aware of the coalescing opportunity between rax/rdx and the
// destination register.
let divisor_copy = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
self.lower_ctx
.emit(MInst::gen_move(divisor_copy, divisor.to_reg(), types::I64));
let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 {
Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap())
} else {
None
};
// TODO use xor
self.lower_ctx.emit(MInst::imm(
OperandSize::Size32,
0,
Writable::from_reg(regs::rdx()),
));
self.lower_ctx.emit(MInst::checked_div_or_rem_seq(
kind.clone(),
size,
divisor_copy,
tmp,
));
} else {
// We don't want more than one trap record for a single instruction,
// so let's not allow the "mem" case (load-op merging) here; force
// divisor into a register instead.
let divisor = RegMem::reg(divisor.to_reg());
// Fill in the high parts:
if kind.is_signed() {
// sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
// signed opcodes.
self.lower_ctx.emit(MInst::sign_extend_data(size));
} else if ty == types::I8 {
self.lower_ctx.emit(MInst::movzx_rm_r(
ExtMode::BL,
RegMem::reg(regs::rax()),
Writable::from_reg(regs::rax()),
));
} else {
// zero for unsigned opcodes.
self.lower_ctx.emit(MInst::imm(
OperandSize::Size64,
0,
Writable::from_reg(regs::rdx()),
));
}
// Emit the actual idiv.
self.lower_ctx
.emit(MInst::div(size, kind.is_signed(), divisor));
}
// Move the result back into the destination reg.
if is_div {
// The quotient is in rax.
self.lower_ctx
.emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty));
} else {
if size == OperandSize::Size8 {
// The remainder is in AH. Right-shift by 8 bits then move from rax.
self.lower_ctx.emit(MInst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(8),
Writable::from_reg(regs::rax()),
));
self.lower_ctx
.emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty));
} else {
// The remainder is in rdx.
self.lower_ctx
.emit(MInst::gen_move(dst.to_writable_reg(), regs::rdx(), ty));
}
}
}
}
impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {

View File

@@ -0,0 +1,67 @@
test compile precise-output
target x86_64
function %f1(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = sdiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; cbw %al, %dl
; idiv %al, (none), %sil, %al, %dl
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = sdiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; cwd %ax, %dx
; idiv %ax, %dx, %si, %ax, %dx
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = sdiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; cdq %eax, %edx
; idiv %eax, %edx, %esi, %eax, %edx
; movq %rbp, %rsp
; popq %rbp
; ret
function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = sdiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; cqo %rax, %rdx
; idiv %rax, %rdx, %rsi, %rax, %rdx
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,51 @@
test compile precise-output
target x86_64
function %f1(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = smulhi v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; imul %ax, %si, %ax, %dx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = smulhi v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; imul %eax, %esi, %eax, %edx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = smulhi v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; imul %rax, %rsi, %rax, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,71 @@
test compile precise-output
target x86_64
function %f1(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = srem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none)
; shrq $8, %rax, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = srem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none)
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = srem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none)
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = srem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none)
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,67 @@
test compile precise-output
target x86_64
function %f1(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = udiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movzbl %al, %eax
; div %al, (none), %sil, %al, %dl
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = udiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; div %ax, %dx, %si, %ax, %dx
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = udiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; div %eax, %edx, %esi, %eax, %edx
; movq %rbp, %rsp
; popq %rbp
; ret
function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = udiv v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; div %rax, %rdx, %rsi, %rax, %rdx
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,51 @@
test compile precise-output
target x86_64
function %f1(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = umulhi v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; mul %ax, %si, %ax, %dx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = umulhi v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; mul %eax, %esi, %eax, %edx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = umulhi v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; mul %rax, %rsi, %rax, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,71 @@
test compile precise-output
target x86_64
function %f1(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = urem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movzbl %al, %eax
; div %al, (none), %sil, %al, %dl
; shrq $8, %rax, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = urem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; div %ax, %dx, %si, %ax, %dx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = urem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; div %eax, %edx, %esi, %eax, %edx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = urem v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movl $0, %edx
; div %rax, %rdx, %rsi, %rax, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret