diff --git a/src/backend.rs b/src/backend.rs index f2cb1401b9..e9648c273b 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -3230,10 +3230,214 @@ impl<'this, M: ModuleContext> Context<'this, M> { self.push(out); } - unop!(i32_clz, lzcnt, Rd, u32, u32::leading_zeros); - unop!(i64_clz, lzcnt, Rq, u64, |a: u64| a.leading_zeros() as u64); - unop!(i32_ctz, tzcnt, Rd, u32, u32::trailing_zeros); - unop!(i64_ctz, tzcnt, Rq, u64, |a: u64| a.trailing_zeros() as u64); + pub fn i32_clz(&mut self) { + let val = self.pop(); + + let out_val = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(imm.as_i32().unwrap().leading_zeros().into()) + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + let temp = self.take_reg(I32).unwrap(); + + if is_x86_feature_detected!("lzcnt") { + dynasm!(self.asm + ; lzcnt Rd(temp.rq().unwrap()), [rsp + offset] + ); + ValueLocation::Reg(temp) + } else { + let temp_2 = self.take_reg(I32).unwrap(); + + dynasm!(self.asm + ; bsr Rd(temp.rq().unwrap()), [rsp + offset] + ; mov Rd(temp_2.rq().unwrap()), DWORD 0x3fu64 as _ + ; cmove Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) + ; mov Rd(temp_2.rq().unwrap()), DWORD 0x1fu64 as _ + ; xor Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) + ); + self.free_value(ValueLocation::Reg(temp_2)); + ValueLocation::Reg(temp) + } + } + ValueLocation::Reg(_) | ValueLocation::Cond(_) => { + let reg = self.into_reg(GPRType::Rq, val).unwrap(); + let temp = self.take_reg(I32).unwrap(); + + if is_x86_feature_detected!("lzcnt") { + dynasm!(self.asm + ; lzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) + ); + ValueLocation::Reg(temp) + } else { + dynasm!(self.asm + ; bsr Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) + ; mov Rd(reg.rq().unwrap()), DWORD 0x3fu64 as _ + ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) + ; mov Rd(reg.rq().unwrap()), DWORD 0x1fu64 as _ + ; xor Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) + ); + ValueLocation::Reg(temp) + } + } + }; + + self.free_value(val); + self.push(out_val); + } + + pub fn i64_clz(&mut self) { + let val = self.pop(); + + let out_val = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(imm.as_i64().unwrap().leading_zeros().into()) + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + let temp = self.take_reg(I64).unwrap(); + + if is_x86_feature_detected!("lzcnt") { + dynasm!(self.asm + ; lzcnt Rq(temp.rq().unwrap()), [rsp + offset] + ); + ValueLocation::Reg(temp) + } else { + let temp_2 = self.take_reg(I64).unwrap(); + + dynasm!(self.asm + ; bsr Rq(temp.rq().unwrap()), [rsp + offset] + ; mov Rq(temp_2.rq().unwrap()), QWORD 0x7fu64 as _ + ; cmove Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) + ; mov Rq(temp_2.rq().unwrap()), QWORD 0x3fu64 as _ + ; xor Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) + ); + self.free_value(ValueLocation::Reg(temp_2)); + ValueLocation::Reg(temp) + } + } + ValueLocation::Reg(_) | ValueLocation::Cond(_) => { + let reg = self.into_reg(GPRType::Rq, val).unwrap(); + let temp = self.take_reg(I64).unwrap(); + + if is_x86_feature_detected!("lzcnt") { + dynasm!(self.asm + ; lzcnt Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) + ); + ValueLocation::Reg(temp) + } else { + dynasm!(self.asm + ; bsr Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) + ; mov Rq(reg.rq().unwrap()), QWORD 0x7fu64 as _ + ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) + ; mov Rq(reg.rq().unwrap()), QWORD 0x3fu64 as _ + ; xor Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) + ); + ValueLocation::Reg(temp) + } + } + }; + + self.free_value(val); + self.push(out_val); + } + + pub fn i32_ctz(&mut self) { + let val = self.pop(); + + let out_val = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(imm.as_i32().unwrap().trailing_zeros().into()) + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + let temp = self.take_reg(I32).unwrap(); + + if is_x86_feature_detected!("lzcnt") { + dynasm!(self.asm + ; tzcnt Rd(temp.rq().unwrap()), [rsp + offset] + ); + ValueLocation::Reg(temp) + } else { + let temp_zero_val = self.take_reg(I32).unwrap(); + + dynasm!(self.asm + ; bsf Rd(temp.rq().unwrap()), [rsp + offset] + ; mov Rd(temp_zero_val.rq().unwrap()), DWORD 0x20u32 as _ + ; cmove Rd(temp.rq().unwrap()), Rd(temp_zero_val.rq().unwrap()) + ); + self.free_value(ValueLocation::Reg(temp_zero_val)); + ValueLocation::Reg(temp) + } + } + ValueLocation::Reg(_) | ValueLocation::Cond(_) => { + let reg = self.into_reg(GPRType::Rq, val).unwrap(); + let temp = self.take_reg(I32).unwrap(); + + if is_x86_feature_detected!("lzcnt") { + dynasm!(self.asm + ; tzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) + ); + ValueLocation::Reg(temp) + } else { + dynasm!(self.asm + ; bsf Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) + ; mov Rd(reg.rq().unwrap()), DWORD 0x20u32 as _ + ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) + ); + ValueLocation::Reg(temp) + } + } + }; + + self.free_value(val); + self.push(out_val); + } + + pub fn i64_ctz(&mut self) { + let val = self.pop(); + + let out_val = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(imm.as_i64().unwrap().trailing_zeros().into()) + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + let temp = self.take_reg(I64).unwrap(); + + if is_x86_feature_detected!("lzcnt") { + dynasm!(self.asm + ; tzcnt Rq(temp.rq().unwrap()), [rsp + offset] + ); + ValueLocation::Reg(temp) + } else { + let temp_zero_val = self.take_reg(I64).unwrap(); + + dynasm!(self.asm + ; bsf Rq(temp.rq().unwrap()), [rsp + offset] + ; mov Rq(temp_zero_val.rq().unwrap()), QWORD 0x40u64 as _ + ; cmove Rq(temp.rq().unwrap()), Rq(temp_zero_val.rq().unwrap()) + ); + self.free_value(ValueLocation::Reg(temp_zero_val)); + ValueLocation::Reg(temp) + } + } + ValueLocation::Reg(_) | ValueLocation::Cond(_) => { + let reg = self.into_reg(GPRType::Rq, val).unwrap(); + let temp = self.take_reg(I64).unwrap(); + + dynasm!(self.asm + ; bsf Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) + ; mov Rq(reg.rq().unwrap()), QWORD 0x40u64 as _ + ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) + ); + ValueLocation::Reg(temp) + } + }; + + self.free_value(val); + self.push(out_val); + } pub fn i32_extend_u(&mut self) { let val = self.pop(); @@ -5272,4 +5476,3 @@ impl IntoLabel for (LabelValue, LabelValue) { Box::new(const_values(self.0, self.1)) } } -