Merge pull request #3506 from fitzgen/isle
Initial ISLE integration for x64
This commit is contained in:
1635
cranelift/codegen/src/clif.isle
Normal file
1635
cranelift/codegen/src/clif.isle
Normal file
File diff suppressed because it is too large
Load Diff
977
cranelift/codegen/src/isa/x64/inst.isle
Normal file
977
cranelift/codegen/src/isa/x64/inst.isle
Normal file
@@ -0,0 +1,977 @@
|
||||
;; Extern type definitions and constructors for the x64 `MachInst` type.
|
||||
|
||||
;;;; `MInst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type MInst extern
|
||||
(enum (Nop (len u8))
|
||||
(AluRmiR (size OperandSize)
|
||||
(op AluRmiROpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMemImm)
|
||||
(dst WritableReg))
|
||||
(MulHi (size OperandSize)
|
||||
(signed bool)
|
||||
(src1 Reg)
|
||||
(src2 RegMem)
|
||||
(dst_lo WritableReg)
|
||||
(dst_hi WritableReg))
|
||||
(XmmRmR (op SseOpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMem)
|
||||
(dst WritableReg))
|
||||
(XmmUnaryRmR (op SseOpcode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(XmmRmiReg (opcode SseOpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMemImm)
|
||||
(dst WritableReg))
|
||||
(XmmRmRImm (op SseOpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMem)
|
||||
(dst WritableReg)
|
||||
(imm u8)
|
||||
(size OperandSize))
|
||||
(CmpRmiR (size OperandSize)
|
||||
(opcode CmpOpcode)
|
||||
(src RegMemImm)
|
||||
(dst Reg))
|
||||
(Imm (dst_size OperandSize)
|
||||
(simm64 u64)
|
||||
(dst WritableReg))
|
||||
(ShiftR (size OperandSize)
|
||||
(kind ShiftKind)
|
||||
(src Reg)
|
||||
(num_bits Imm8Reg)
|
||||
(dst WritableReg))
|
||||
(MovzxRmR (ext_mode ExtMode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(MovsxRmR (ext_mode ExtMode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(Cmove (size OperandSize)
|
||||
(cc CC)
|
||||
(consequent RegMem)
|
||||
(alternative Reg)
|
||||
(dst WritableReg))
|
||||
(XmmRmREvex (op Avx512Opcode)
|
||||
(src1 RegMem)
|
||||
(src2 Reg)
|
||||
(dst WritableReg))))
|
||||
|
||||
(type OperandSize extern
|
||||
(enum Size8
|
||||
Size16
|
||||
Size32
|
||||
Size64))
|
||||
|
||||
;; Get the `OperandSize` for a given `Type`.
|
||||
(decl operand_size_of_type (Type) OperandSize)
|
||||
(extern constructor operand_size_of_type operand_size_of_type)
|
||||
|
||||
;; Get the bit width of an `OperandSize`.
|
||||
(decl operand_size_bits (OperandSize) u16)
|
||||
(rule (operand_size_bits (OperandSize.Size8)) 8)
|
||||
(rule (operand_size_bits (OperandSize.Size16)) 16)
|
||||
(rule (operand_size_bits (OperandSize.Size32)) 32)
|
||||
(rule (operand_size_bits (OperandSize.Size64)) 64)
|
||||
|
||||
(type AluRmiROpcode extern
|
||||
(enum Add
|
||||
Adc
|
||||
Sub
|
||||
Sbb
|
||||
And
|
||||
Or
|
||||
Xor
|
||||
Mul
|
||||
And8
|
||||
Or8))
|
||||
|
||||
(type SseOpcode extern
|
||||
(enum Addps
|
||||
Addpd
|
||||
Addss
|
||||
Addsd
|
||||
Andps
|
||||
Andpd
|
||||
Andnps
|
||||
Andnpd
|
||||
Blendvpd
|
||||
Blendvps
|
||||
Comiss
|
||||
Comisd
|
||||
Cmpps
|
||||
Cmppd
|
||||
Cmpss
|
||||
Cmpsd
|
||||
Cvtdq2ps
|
||||
Cvtdq2pd
|
||||
Cvtpd2ps
|
||||
Cvtps2pd
|
||||
Cvtsd2ss
|
||||
Cvtsd2si
|
||||
Cvtsi2ss
|
||||
Cvtsi2sd
|
||||
Cvtss2si
|
||||
Cvtss2sd
|
||||
Cvttpd2dq
|
||||
Cvttps2dq
|
||||
Cvttss2si
|
||||
Cvttsd2si
|
||||
Divps
|
||||
Divpd
|
||||
Divss
|
||||
Divsd
|
||||
Insertps
|
||||
Maxps
|
||||
Maxpd
|
||||
Maxss
|
||||
Maxsd
|
||||
Minps
|
||||
Minpd
|
||||
Minss
|
||||
Minsd
|
||||
Movaps
|
||||
Movapd
|
||||
Movd
|
||||
Movdqa
|
||||
Movdqu
|
||||
Movlhps
|
||||
Movmskps
|
||||
Movmskpd
|
||||
Movq
|
||||
Movss
|
||||
Movsd
|
||||
Movups
|
||||
Movupd
|
||||
Mulps
|
||||
Mulpd
|
||||
Mulss
|
||||
Mulsd
|
||||
Orps
|
||||
Orpd
|
||||
Pabsb
|
||||
Pabsw
|
||||
Pabsd
|
||||
Packssdw
|
||||
Packsswb
|
||||
Packusdw
|
||||
Packuswb
|
||||
Paddb
|
||||
Paddd
|
||||
Paddq
|
||||
Paddw
|
||||
Paddsb
|
||||
Paddsw
|
||||
Paddusb
|
||||
Paddusw
|
||||
Palignr
|
||||
Pand
|
||||
Pandn
|
||||
Pavgb
|
||||
Pavgw
|
||||
Pblendvb
|
||||
Pcmpeqb
|
||||
Pcmpeqw
|
||||
Pcmpeqd
|
||||
Pcmpeqq
|
||||
Pcmpgtb
|
||||
Pcmpgtw
|
||||
Pcmpgtd
|
||||
Pcmpgtq
|
||||
Pextrb
|
||||
Pextrw
|
||||
Pextrd
|
||||
Pinsrb
|
||||
Pinsrw
|
||||
Pinsrd
|
||||
Pmaddubsw
|
||||
Pmaddwd
|
||||
Pmaxsb
|
||||
Pmaxsw
|
||||
Pmaxsd
|
||||
Pmaxub
|
||||
Pmaxuw
|
||||
Pmaxud
|
||||
Pminsb
|
||||
Pminsw
|
||||
Pminsd
|
||||
Pminub
|
||||
Pminuw
|
||||
Pminud
|
||||
Pmovmskb
|
||||
Pmovsxbd
|
||||
Pmovsxbw
|
||||
Pmovsxbq
|
||||
Pmovsxwd
|
||||
Pmovsxwq
|
||||
Pmovsxdq
|
||||
Pmovzxbd
|
||||
Pmovzxbw
|
||||
Pmovzxbq
|
||||
Pmovzxwd
|
||||
Pmovzxwq
|
||||
Pmovzxdq
|
||||
Pmuldq
|
||||
Pmulhw
|
||||
Pmulhuw
|
||||
Pmulhrsw
|
||||
Pmulld
|
||||
Pmullw
|
||||
Pmuludq
|
||||
Por
|
||||
Pshufb
|
||||
Pshufd
|
||||
Psllw
|
||||
Pslld
|
||||
Psllq
|
||||
Psraw
|
||||
Psrad
|
||||
Psrlw
|
||||
Psrld
|
||||
Psrlq
|
||||
Psubb
|
||||
Psubd
|
||||
Psubq
|
||||
Psubw
|
||||
Psubsb
|
||||
Psubsw
|
||||
Psubusb
|
||||
Psubusw
|
||||
Ptest
|
||||
Punpckhbw
|
||||
Punpckhwd
|
||||
Punpcklbw
|
||||
Punpcklwd
|
||||
Pxor
|
||||
Rcpss
|
||||
Roundps
|
||||
Roundpd
|
||||
Roundss
|
||||
Roundsd
|
||||
Rsqrtss
|
||||
Shufps
|
||||
Sqrtps
|
||||
Sqrtpd
|
||||
Sqrtss
|
||||
Sqrtsd
|
||||
Subps
|
||||
Subpd
|
||||
Subss
|
||||
Subsd
|
||||
Ucomiss
|
||||
Ucomisd
|
||||
Unpcklps
|
||||
Xorps
|
||||
Xorpd))
|
||||
|
||||
(type CmpOpcode extern
|
||||
(enum Cmp
|
||||
Test))
|
||||
|
||||
(type RegMemImm extern
|
||||
(enum
|
||||
(Reg (reg Reg))
|
||||
(Mem (addr SyntheticAmode))
|
||||
(Imm (simm32 u32))))
|
||||
|
||||
(type RegMem extern
|
||||
(enum
|
||||
(Reg (reg Reg))
|
||||
(Mem (addr SyntheticAmode))))
|
||||
|
||||
;; Put the given clif value into a `RegMem` operand.
|
||||
;;
|
||||
;; Asserts that the value fits into a single register, and doesn't require
|
||||
;; multiple registers for its representation (like `i128` for example).
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_reg_mem (Value) RegMem)
|
||||
(extern constructor put_in_reg_mem put_in_reg_mem)
|
||||
|
||||
(type SyntheticAmode extern (enum))
|
||||
|
||||
(type ShiftKind extern
|
||||
(enum ShiftLeft
|
||||
ShiftRightLogical
|
||||
ShiftRightArithmetic
|
||||
RotateLeft
|
||||
RotateRight))
|
||||
|
||||
(type Imm8Reg extern
|
||||
(enum (Imm8 (imm u8))
|
||||
(Reg (reg Reg))))
|
||||
|
||||
(type CC extern
|
||||
(enum O
|
||||
NO
|
||||
B
|
||||
NB
|
||||
Z
|
||||
NZ
|
||||
BE
|
||||
NBE
|
||||
S
|
||||
NS
|
||||
L
|
||||
NL
|
||||
LE
|
||||
NLE
|
||||
P
|
||||
NP))
|
||||
|
||||
(type Avx512Opcode extern
|
||||
(enum Vcvtudq2ps
|
||||
Vpabsq
|
||||
Vpermi2b
|
||||
Vpmullq
|
||||
Vpopcntb))
|
||||
|
||||
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl avx512vl_enabled () Type)
|
||||
(extern extractor avx512vl_enabled avx512vl_enabled)
|
||||
|
||||
(decl avx512dq_enabled () Type)
|
||||
(extern extractor avx512dq_enabled avx512dq_enabled)
|
||||
|
||||
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Extract a constant `Imm8Reg.Imm8` from a value operand.
|
||||
(decl imm8_from_value (Imm8Reg) Value)
|
||||
(extern extractor imm8_from_value imm8_from_value)
|
||||
|
||||
;; Extract a constant `RegMemImm.Imm` from a value operand.
|
||||
(decl simm32_from_value (RegMemImm) Value)
|
||||
(extern extractor simm32_from_value simm32_from_value)
|
||||
|
||||
;; Extract a constant `RegMemImm.Imm` from an `Imm64` immediate.
|
||||
(decl simm32_from_imm64 (RegMemImm) Imm64)
|
||||
(extern extractor simm32_from_imm64 simm32_from_imm64)
|
||||
|
||||
;; A load that can be sunk into another operation.
|
||||
(type SinkableLoad extern (enum))
|
||||
|
||||
;; Extract a `SinkableLoad` that works with `RegMemImm.Mem` from a value
|
||||
;; operand.
|
||||
(decl sinkable_load (SinkableLoad) Value)
|
||||
(extern extractor sinkable_load sinkable_load)
|
||||
|
||||
;; Sink a `SinkableLoad` into a `RegMemImm.Mem`.
|
||||
;;
|
||||
;; This is a side-effectful operation that notifies the context that the
|
||||
;; instruction that produced the `SinkableImm` has been sunk into another
|
||||
;; instruction, and no longer needs to be lowered.
|
||||
(decl sink_load (SinkableLoad) RegMemImm)
|
||||
(extern constructor sink_load sink_load)
|
||||
|
||||
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that are used for their
|
||||
;; effect on flags.
|
||||
(type ProducesFlags (enum (ProducesFlags (inst MInst) (result Reg))))
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that consume flags.
|
||||
(type ConsumesFlags (enum (ConsumesFlags (inst MInst) (result Reg))))
|
||||
|
||||
;; Combine flags-producing and -consuming instructions together, ensuring that
|
||||
;; they are emitted back-to-back and no other instructions can be emitted
|
||||
;; between them and potentially clobber the flags.
|
||||
;;
|
||||
;; Returns a `ValueRegs` where the first register is the result of the
|
||||
;; `ProducesFlags` instruction and the second is the result of the
|
||||
;; `ConsumesFlags` instruction.
|
||||
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
|
||||
(rule (with_flags (ProducesFlags.ProducesFlags producer_inst producer_result)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
(value_regs producer_result consumer_result)))
|
||||
|
||||
;; Like `with_flags` but returns only the result of the consumer operation.
|
||||
(decl with_flags_1 (ProducesFlags ConsumesFlags) Reg)
|
||||
(rule (with_flags_1 (ProducesFlags.ProducesFlags producer_inst _producer_result)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
consumer_result))
|
||||
|
||||
;; Like `with_flags` but allows two consumers of the same flags. The result is a
|
||||
;; `ValueRegs` containing the first consumer's result and then the second
|
||||
;; consumer's result.
|
||||
(decl with_flags_2 (ProducesFlags ConsumesFlags ConsumesFlags) ValueRegs)
|
||||
(rule (with_flags_2 (ProducesFlags.ProducesFlags producer_inst producer_result)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst_1 consumer_result_1)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst_2 consumer_result_2))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst_1))
|
||||
(_z Unit (emit consumer_inst_2)))
|
||||
(value_regs consumer_result_1 consumer_result_2)))
|
||||
|
||||
;;;; Helpers for Sign/Zero Extending ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type ExtendKind (enum Sign Zero))
|
||||
|
||||
(type ExtMode extern (enum BL BQ WL WQ LQ))
|
||||
|
||||
;; `ExtMode::new`
|
||||
(decl ext_mode (u16 u16) ExtMode)
|
||||
(extern constructor ext_mode ext_mode)
|
||||
|
||||
;; Put the given value into a register, but extended as the given type.
|
||||
(decl extend_to_reg (Value Type ExtendKind) Reg)
|
||||
|
||||
;; If the value is already of the requested type, no extending is necessary.
|
||||
(rule (extend_to_reg (and val (value_type ty)) =ty _kind)
|
||||
(put_in_reg val))
|
||||
|
||||
(rule (extend_to_reg (and val (value_type from_ty))
|
||||
to_ty
|
||||
kind)
|
||||
(let ((from_bits u16 (ty_bits from_ty))
|
||||
;; Use `operand_size_of_type` so that the we clamp the output to 32-
|
||||
;; or 64-bit width types.
|
||||
(to_bits u16 (operand_size_bits (operand_size_of_type to_ty))))
|
||||
(extend kind
|
||||
to_ty
|
||||
(ext_mode from_bits to_bits)
|
||||
(put_in_reg_mem val))))
|
||||
|
||||
;; Do a sign or zero extension of the given `RegMem`.
|
||||
(decl extend (ExtendKind Type ExtMode RegMem) Reg)
|
||||
|
||||
;; Zero extending uses `movzx`.
|
||||
(rule (extend (ExtendKind.Zero) ty mode src)
|
||||
(movzx ty mode src))
|
||||
|
||||
;; Sign extending uses `movsx`.
|
||||
(rule (extend (ExtendKind.Sign) ty mode src)
|
||||
(movsx ty mode src))
|
||||
|
||||
;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; These constructors create SSA-style `MInst`s. It is their responsibility to
|
||||
;; maintain the invariant that each temporary register they allocate and define
|
||||
;; only gets defined the once.
|
||||
|
||||
;; Emit an instruction.
|
||||
;;
|
||||
;; This is low-level and side-effectful; it should only be used as an
|
||||
;; implementation detail by helpers that preserve the SSA facade themselves.
|
||||
(decl emit (MInst) Unit)
|
||||
(extern constructor emit emit)
|
||||
|
||||
;; Helper for emitting `MInst.AluRmiR` instructions.
|
||||
(decl alu_rmi_r (Type AluRmiROpcode Reg RegMemImm) Reg)
|
||||
(rule (alu_rmi_r ty opcode src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.AluRmiR size opcode src1 src2 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for emitting `add` instructions.
|
||||
(decl add (Type Reg RegMemImm) Reg)
|
||||
(rule (add ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Add)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for creating `add` instructions whose flags are also used.
|
||||
(decl add_with_flags (Type Reg RegMemImm) ProducesFlags)
|
||||
(rule (add_with_flags ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Add)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `adc` instructions.
|
||||
(decl adc (Type Reg RegMemImm) ConsumesFlags)
|
||||
(rule (adc ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Adc)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for emitting `sub` instructions.
|
||||
(decl sub (Type Reg RegMemImm) Reg)
|
||||
(rule (sub ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Sub)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for creating `sub` instructions whose flags are also used.
|
||||
(decl sub_with_flags (Type Reg RegMemImm) ProducesFlags)
|
||||
(rule (sub_with_flags ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Sub)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `sbb` instructions.
|
||||
(decl sbb (Type Reg RegMemImm) ConsumesFlags)
|
||||
(rule (sbb ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Sbb)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `mul` instructions.
|
||||
(decl mul (Type Reg RegMemImm) Reg)
|
||||
(rule (mul ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Mul)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting `and` instructions.
|
||||
;;
|
||||
;; Use `m_` prefix (short for "mach inst") to disambiguate with the ISLE-builtin
|
||||
;; `and` operator.
|
||||
(decl m_and (Type Reg RegMemImm) Reg)
|
||||
(rule (m_and ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.And)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting `or` instructions.
|
||||
(decl or (Type Reg RegMemImm) Reg)
|
||||
(rule (or ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Or)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting `xor` instructions.
|
||||
(decl xor (Type Reg RegMemImm) Reg)
|
||||
(rule (xor ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Xor)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting immediates.
|
||||
(decl imm (Type u64) Reg)
|
||||
(rule (imm ty simm64)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.Imm size simm64 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
(decl nonzero_u64_fits_in_u32 (u64) u64)
|
||||
(extern extractor nonzero_u64_fits_in_u32 nonzero_u64_fits_in_u32)
|
||||
|
||||
;; Special case for when a 64-bit immediate fits into 32-bits. We can use a
|
||||
;; 32-bit move that zero-extends the value, which has a smaller encoding.
|
||||
(rule (imm $I64 (nonzero_u64_fits_in_u32 x))
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.Imm (OperandSize.Size32) x dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Special case for zero immediates: turn them into an `xor r, r`.
|
||||
(rule (imm ty 0)
|
||||
(let ((wr WritableReg (temp_writable_reg ty))
|
||||
(r Reg (writable_reg_to_reg wr))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.AluRmiR size
|
||||
(AluRmiROpcode.Xor)
|
||||
r
|
||||
(RegMemImm.Reg r)
|
||||
wr))))
|
||||
r))
|
||||
|
||||
;; Helper for creating `MInst.ShifR` instructions.
|
||||
(decl shift_r (Type ShiftKind Reg Imm8Reg) Reg)
|
||||
(rule (shift_r ty kind src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.ShiftR size kind src1 src2 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `rotl` instructions (prefixed with "m_", short for "mach
|
||||
;; inst", to disambiguate this from clif's `rotl`).
|
||||
(decl m_rotl (Type Reg Imm8Reg) Reg)
|
||||
(rule (m_rotl ty src1 src2)
|
||||
(shift_r ty (ShiftKind.RotateLeft) src1 src2))
|
||||
|
||||
;; Helper for creating `shl` instructions.
|
||||
(decl shl (Type Reg Imm8Reg) Reg)
|
||||
(rule (shl ty src1 src2)
|
||||
(shift_r ty (ShiftKind.ShiftLeft) src1 src2))
|
||||
|
||||
;; Helper for creating logical shift-right instructions.
|
||||
(decl shr (Type Reg Imm8Reg) Reg)
|
||||
(rule (shr ty src1 src2)
|
||||
(shift_r ty (ShiftKind.ShiftRightLogical) src1 src2))
|
||||
|
||||
;; Helper for creating arithmetic shift-right instructions.
|
||||
(decl sar (Type Reg Imm8Reg) Reg)
|
||||
(rule (sar ty src1 src2)
|
||||
(shift_r ty (ShiftKind.ShiftRightArithmetic) src1 src2))
|
||||
|
||||
;; Helper for creating `MInst.CmpRmiR` instructions.
|
||||
(decl cmp_rmi_r (OperandSize CmpOpcode RegMemImm Reg) ProducesFlags)
|
||||
(rule (cmp_rmi_r size opcode src1 src2)
|
||||
(ProducesFlags.ProducesFlags (MInst.CmpRmiR size
|
||||
opcode
|
||||
src1
|
||||
src2)
|
||||
(invalid_reg)))
|
||||
|
||||
;; Helper for creating `cmp` instructions.
|
||||
(decl cmp (OperandSize RegMemImm Reg) ProducesFlags)
|
||||
(rule (cmp size src1 src2)
|
||||
(cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
|
||||
|
||||
;; Helper for creating `test` instructions.
|
||||
(decl test (OperandSize RegMemImm Reg) ProducesFlags)
|
||||
(rule (test size src1 src2)
|
||||
(cmp_rmi_r size (CmpOpcode.Test) src1 src2))
|
||||
|
||||
;; Helper for creating `MInst.Cmove` instructions.
|
||||
(decl cmove (Type CC RegMem Reg) ConsumesFlags)
|
||||
(rule (cmove ty cc consequent alternative)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty)))
|
||||
(ConsumesFlags.ConsumesFlags (MInst.Cmove size cc consequent alternative dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `MInst.MovzxRmR` instructions.
|
||||
(decl movzx (Type ExtMode RegMem) Reg)
|
||||
(rule (movzx ty mode src)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.MovzxRmR mode src dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `MInst.MovsxRmR` instructions.
|
||||
(decl movsx (Type ExtMode RegMem) Reg)
|
||||
(rule (movsx ty mode src)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.MovsxRmR mode src dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmR` instructions.
|
||||
(decl xmm_rm_r (Type SseOpcode Reg RegMem) Reg)
|
||||
(rule (xmm_rm_r ty op src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.XmmRmR op src1 src2 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `paddb` instructions.
|
||||
(decl paddb (Reg RegMem) Reg)
|
||||
(rule (paddb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Paddb) src1 src2))
|
||||
|
||||
;; Helper for creating `paddw` instructions.
|
||||
(decl paddw (Reg RegMem) Reg)
|
||||
(rule (paddw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Paddw) src1 src2))
|
||||
|
||||
;; Helper for creating `paddd` instructions.
|
||||
(decl paddd (Reg RegMem) Reg)
|
||||
(rule (paddd src1 src2)
|
||||
(xmm_rm_r $I32X4 (SseOpcode.Paddd) src1 src2))
|
||||
|
||||
;; Helper for creating `paddq` instructions.
|
||||
(decl paddq (Reg RegMem) Reg)
|
||||
(rule (paddq src1 src2)
|
||||
(xmm_rm_r $I64X2 (SseOpcode.Paddq) src1 src2))
|
||||
|
||||
;; Helper for creating `paddsb` instructions.
|
||||
(decl paddsb (Reg RegMem) Reg)
|
||||
(rule (paddsb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Paddsb) src1 src2))
|
||||
|
||||
;; Helper for creating `paddsw` instructions.
|
||||
(decl paddsw (Reg RegMem) Reg)
|
||||
(rule (paddsw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Paddsw) src1 src2))
|
||||
|
||||
;; Helper for creating `paddusb` instructions.
|
||||
(decl paddusb (Reg RegMem) Reg)
|
||||
(rule (paddusb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Paddusb) src1 src2))
|
||||
|
||||
;; Helper for creating `paddusw` instructions.
|
||||
(decl paddusw (Reg RegMem) Reg)
|
||||
(rule (paddusw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Paddusw) src1 src2))
|
||||
|
||||
;; Helper for creating `psubb` instructions.
|
||||
(decl psubb (Reg RegMem) Reg)
|
||||
(rule (psubb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Psubb) src1 src2))
|
||||
|
||||
;; Helper for creating `psubw` instructions.
|
||||
(decl psubw (Reg RegMem) Reg)
|
||||
(rule (psubw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Psubw) src1 src2))
|
||||
|
||||
;; Helper for creating `psubd` instructions.
|
||||
(decl psubd (Reg RegMem) Reg)
|
||||
(rule (psubd src1 src2)
|
||||
(xmm_rm_r $I32X4 (SseOpcode.Psubd) src1 src2))
|
||||
|
||||
;; Helper for creating `psubq` instructions.
|
||||
(decl psubq (Reg RegMem) Reg)
|
||||
(rule (psubq src1 src2)
|
||||
(xmm_rm_r $I64X2 (SseOpcode.Psubq) src1 src2))
|
||||
|
||||
;; Helper for creating `psubsb` instructions.
|
||||
(decl psubsb (Reg RegMem) Reg)
|
||||
(rule (psubsb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Psubsb) src1 src2))
|
||||
|
||||
;; Helper for creating `psubsw` instructions.
|
||||
(decl psubsw (Reg RegMem) Reg)
|
||||
(rule (psubsw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Psubsw) src1 src2))
|
||||
|
||||
;; Helper for creating `psubusb` instructions.
|
||||
(decl psubusb (Reg RegMem) Reg)
|
||||
(rule (psubusb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Psubusb) src1 src2))
|
||||
|
||||
;; Helper for creating `psubusw` instructions.
|
||||
(decl psubusw (Reg RegMem) Reg)
|
||||
(rule (psubusw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Psubusw) src1 src2))
|
||||
|
||||
;; Helper for creating `pavgb` instructions.
|
||||
(decl pavgb (Reg RegMem) Reg)
|
||||
(rule (pavgb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pavgb) src1 src2))
|
||||
|
||||
;; Helper for creating `pavgw` instructions.
|
||||
(decl pavgw (Reg RegMem) Reg)
|
||||
(rule (pavgw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pavgw) src1 src2))
|
||||
|
||||
;; Helper for creating `pand` instructions.
|
||||
(decl pand (Reg RegMem) Reg)
|
||||
(rule (pand src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Pand) src1 src2))
|
||||
|
||||
;; Helper for creating `andps` instructions.
|
||||
(decl andps (Reg RegMem) Reg)
|
||||
(rule (andps src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Andps) src1 src2))
|
||||
|
||||
;; Helper for creating `andpd` instructions.
|
||||
(decl andpd (Reg RegMem) Reg)
|
||||
(rule (andpd src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Andpd) src1 src2))
|
||||
|
||||
;; Helper for creating `por` instructions.
|
||||
(decl por (Reg RegMem) Reg)
|
||||
(rule (por src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Por) src1 src2))
|
||||
|
||||
;; Helper for creating `orps` instructions.
|
||||
(decl orps (Reg RegMem) Reg)
|
||||
(rule (orps src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Orps) src1 src2))
|
||||
|
||||
;; Helper for creating `orpd` instructions.
|
||||
(decl orpd (Reg RegMem) Reg)
|
||||
(rule (orpd src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Orpd) src1 src2))
|
||||
|
||||
;; Helper for creating `pxor` instructions.
|
||||
(decl pxor (Reg RegMem) Reg)
|
||||
(rule (pxor src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pxor) src1 src2))
|
||||
|
||||
;; Helper for creating `xorps` instructions.
|
||||
(decl xorps (Reg RegMem) Reg)
|
||||
(rule (xorps src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Xorps) src1 src2))
|
||||
|
||||
;; Helper for creating `xorpd` instructions.
|
||||
(decl xorpd (Reg RegMem) Reg)
|
||||
(rule (xorpd src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Xorpd) src1 src2))
|
||||
|
||||
;; Helper for creating `pmullw` instructions.
|
||||
(decl pmullw (Reg RegMem) Reg)
|
||||
(rule (pmullw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmullw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmulld` instructions.
|
||||
(decl pmulld (Reg RegMem) Reg)
|
||||
(rule (pmulld src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmulld) src1 src2))
|
||||
|
||||
;; Helper for creating `pmulhw` instructions.
|
||||
(decl pmulhw (Reg RegMem) Reg)
|
||||
(rule (pmulhw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmulhw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmulhuw` instructions.
|
||||
(decl pmulhuw (Reg RegMem) Reg)
|
||||
(rule (pmulhuw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmulhuw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmuldq` instructions.
|
||||
(decl pmuldq (Reg RegMem) Reg)
|
||||
(rule (pmuldq src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmuldq) src1 src2))
|
||||
|
||||
;; Helper for creating `pmuludq` instructions.
|
||||
(decl pmuludq (Reg RegMem) Reg)
|
||||
(rule (pmuludq src1 src2)
|
||||
(xmm_rm_r $I64X2 (SseOpcode.Pmuludq) src1 src2))
|
||||
|
||||
;; Helper for creating `punpckhwd` instructions.
|
||||
(decl punpckhwd (Reg RegMem) Reg)
|
||||
(rule (punpckhwd src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Punpckhwd) src1 src2))
|
||||
|
||||
;; Helper for creating `punpcklwd` instructions.
|
||||
(decl punpcklwd (Reg RegMem) Reg)
|
||||
(rule (punpcklwd src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Punpcklwd) src1 src2))
|
||||
|
||||
;; Helper for creating `andnps` instructions.
|
||||
(decl andnps (Reg RegMem) Reg)
|
||||
(rule (andnps src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Andnps) src1 src2))
|
||||
|
||||
;; Helper for creating `andnpd` instructions.
|
||||
(decl andnpd (Reg RegMem) Reg)
|
||||
(rule (andnpd src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Andnpd) src1 src2))
|
||||
|
||||
;; Helper for creating `pandn` instructions.
|
||||
(decl pandn (Reg RegMem) Reg)
|
||||
(rule (pandn src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmRImm` instructions.
|
||||
(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
|
||||
(rule (xmm_rm_r_imm op src1 src2 imm size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmRmRImm op
|
||||
src1
|
||||
src2
|
||||
dst
|
||||
imm
|
||||
size))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `palignr` instructions.
|
||||
(decl palignr (Reg RegMem u8 OperandSize) Reg)
|
||||
(rule (palignr src1 src2 imm size)
|
||||
(xmm_rm_r_imm (SseOpcode.Palignr)
|
||||
src1
|
||||
src2
|
||||
imm
|
||||
size))
|
||||
|
||||
;; Helper for creating `pshufd` instructions.
|
||||
(decl pshufd (RegMem u8 OperandSize) Reg)
|
||||
(rule (pshufd src imm size)
|
||||
(let ((w_dst WritableReg (temp_writable_reg $I8X16))
|
||||
(dst Reg (writable_reg_to_reg w_dst))
|
||||
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pshufd)
|
||||
dst
|
||||
src
|
||||
w_dst
|
||||
imm
|
||||
size))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating `MInst.XmmUnaryRmR` instructions.
|
||||
(decl xmm_unary_rm_r (SseOpcode RegMem) Reg)
|
||||
(rule (xmm_unary_rm_r op src)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmUnaryRmR op src dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `pmovsxbw` instructions.
|
||||
(decl pmovsxbw (RegMem) Reg)
|
||||
(rule (pmovsxbw src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pmovsxbw) src))
|
||||
|
||||
;; Helper for creating `pmovzxbw` instructions.
|
||||
(decl pmovzxbw (RegMem) Reg)
|
||||
(rule (pmovzxbw src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pmovzxbw) src))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmREvex` instructions.
|
||||
(decl xmm_rm_r_evex (Avx512Opcode RegMem Reg) Reg)
|
||||
(rule (xmm_rm_r_evex op src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmRmREvex op
|
||||
src1
|
||||
src2
|
||||
dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `vpmullq` instructions.
|
||||
;;
|
||||
;; Requires AVX-512 vl and dq.
|
||||
(decl vpmullq (RegMem Reg) Reg)
|
||||
(rule (vpmullq src1 src2)
|
||||
(xmm_rm_r_evex (Avx512Opcode.Vpmullq)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmiReg` instructions.
|
||||
(decl xmm_rmi_reg (SseOpcode Reg RegMemImm) Reg)
|
||||
(rule (xmm_rmi_reg op src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmRmiReg op
|
||||
src1
|
||||
src2
|
||||
dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `psllq` instructions.
|
||||
(decl psllq (Reg RegMemImm) Reg)
|
||||
(rule (psllq src1 src2)
|
||||
(xmm_rmi_reg (SseOpcode.Psllq) src1 src2))
|
||||
|
||||
;; Helper for creating `psrlq` instructions.
|
||||
(decl psrlq (Reg RegMemImm) Reg)
|
||||
(rule (psrlq src1 src2)
|
||||
(xmm_rmi_reg (SseOpcode.Psrlq) src1 src2))
|
||||
|
||||
;; Helper for creating `MInst.MulHi` instructions.
|
||||
;;
|
||||
;; Returns the (lo, hi) register halves of the multiplication.
|
||||
(decl mul_hi (Type bool Reg RegMem) ValueRegs)
|
||||
(rule (mul_hi ty signed src1 src2)
|
||||
(let ((dst_lo WritableReg (temp_writable_reg ty))
|
||||
(dst_hi WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.MulHi size
|
||||
signed
|
||||
src1
|
||||
src2
|
||||
dst_lo
|
||||
dst_hi))))
|
||||
(value_regs (writable_reg_to_reg dst_lo)
|
||||
(writable_reg_to_reg dst_hi))))
|
||||
|
||||
;; Helper for creating `mul` instructions that return both the lower and
|
||||
;; (unsigned) higher halves of the result.
|
||||
(decl mulhi_u (Type Reg RegMem) ValueRegs)
|
||||
(rule (mulhi_u ty src1 src2)
|
||||
(mul_hi ty $false src1 src2))
|
||||
@@ -1,14 +1,13 @@
|
||||
//! Instruction operand sub-components (aka "parts"): definitions and printing.
|
||||
|
||||
use super::regs::{self, show_ireg_sized};
|
||||
use super::EmitState;
|
||||
use super::{EmitState, RegMapper};
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::{MemFlags, Type};
|
||||
use crate::isa::x64::inst::Inst;
|
||||
use crate::machinst::*;
|
||||
use regalloc::{
|
||||
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
|
||||
RegUsageMapper, Writable,
|
||||
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, Writable,
|
||||
};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt;
|
||||
@@ -175,7 +174,7 @@ impl SyntheticAmode {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
pub(crate) fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
|
||||
match self {
|
||||
SyntheticAmode::Real(addr) => addr.map_uses(map),
|
||||
SyntheticAmode::NominalSPOffset { .. } => {
|
||||
@@ -285,6 +284,25 @@ impl PrettyPrintSized for RegMemImm {
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an 8-bit integer immediate or a register.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Imm8Reg {
|
||||
Imm8 { imm: u8 },
|
||||
Reg { reg: Reg },
|
||||
}
|
||||
|
||||
impl From<u8> for Imm8Reg {
|
||||
fn from(imm: u8) -> Self {
|
||||
Self::Imm8 { imm }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Reg> for Imm8Reg {
|
||||
fn from(reg: Reg) -> Self {
|
||||
Self::Reg { reg }
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
|
||||
/// 32, 64, or 128 bit value.
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -147,14 +147,16 @@ pub(crate) fn emit(
|
||||
Inst::AluRmiR {
|
||||
size,
|
||||
op,
|
||||
src,
|
||||
src1,
|
||||
src2,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, reg_g.to_reg());
|
||||
let mut rex = RexFlags::from(*size);
|
||||
if *op == AluRmiROpcode::Mul {
|
||||
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
|
||||
// we have to special-case it.
|
||||
match src {
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
@@ -213,7 +215,7 @@ pub(crate) fn emit(
|
||||
};
|
||||
assert!(!(is_8bit && *size == OperandSize::Size64));
|
||||
|
||||
match src {
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
if is_8bit {
|
||||
rex.always_emit_if_8bit_needed(*reg_e);
|
||||
@@ -323,8 +325,9 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Not { size, src } => {
|
||||
let rex_flags = RexFlags::from((*size, src.to_reg()));
|
||||
Inst::Not { size, src, dst } => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -333,12 +336,13 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
let subopcode = 2;
|
||||
let enc_src = int_reg_enc(src.to_reg());
|
||||
let enc_src = int_reg_enc(dst.to_reg());
|
||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
|
||||
}
|
||||
|
||||
Inst::Neg { size, src } => {
|
||||
let rex_flags = RexFlags::from((*size, src.to_reg()));
|
||||
Inst::Neg { size, src, dst } => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -347,15 +351,21 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
let subopcode = 3;
|
||||
let enc_src = int_reg_enc(src.to_reg());
|
||||
let enc_src = int_reg_enc(dst.to_reg());
|
||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
|
||||
}
|
||||
|
||||
Inst::Div {
|
||||
size,
|
||||
signed,
|
||||
dividend,
|
||||
divisor,
|
||||
dst_quotient,
|
||||
dst_remainder,
|
||||
} => {
|
||||
debug_assert_eq!(*dividend, regs::rax());
|
||||
debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
|
||||
debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -397,7 +407,18 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::MulHi { size, signed, rhs } => {
|
||||
Inst::MulHi {
|
||||
size,
|
||||
signed,
|
||||
src1,
|
||||
src2,
|
||||
dst_lo,
|
||||
dst_hi,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, regs::rax());
|
||||
debug_assert_eq!(dst_lo.to_reg(), regs::rax());
|
||||
debug_assert_eq!(dst_hi.to_reg(), regs::rdx());
|
||||
|
||||
let rex_flags = RexFlags::from(*size);
|
||||
let prefix = match size {
|
||||
OperandSize::Size16 => LegacyPrefixes::_66,
|
||||
@@ -407,7 +428,7 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
let subopcode = if *signed { 5 } else { 4 };
|
||||
match rhs {
|
||||
match src2 {
|
||||
RegMem::Reg { reg } => {
|
||||
let src = int_reg_enc(*reg);
|
||||
emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
|
||||
@@ -421,28 +442,39 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::SignExtendData { size } => match size {
|
||||
OperandSize::Size8 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x98);
|
||||
Inst::SignExtendData { size, src, dst } => {
|
||||
debug_assert_eq!(*src, regs::rax());
|
||||
debug_assert_eq!(dst.to_reg(), regs::rdx());
|
||||
match size {
|
||||
OperandSize::Size8 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x98);
|
||||
}
|
||||
OperandSize::Size16 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
OperandSize::Size32 => sink.put1(0x99),
|
||||
OperandSize::Size64 => {
|
||||
sink.put1(0x48);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
}
|
||||
OperandSize::Size16 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
OperandSize::Size32 => sink.put1(0x99),
|
||||
OperandSize::Size64 => {
|
||||
sink.put1(0x48);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
Inst::CheckedDivOrRemSeq {
|
||||
kind,
|
||||
size,
|
||||
dividend,
|
||||
divisor,
|
||||
tmp,
|
||||
dst_quotient,
|
||||
dst_remainder,
|
||||
} => {
|
||||
debug_assert_eq!(*dividend, regs::rax());
|
||||
debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
|
||||
debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
|
||||
|
||||
// Generates the following code sequence:
|
||||
//
|
||||
// ;; check divide by zero:
|
||||
@@ -792,9 +824,11 @@ pub(crate) fn emit(
|
||||
Inst::ShiftR {
|
||||
size,
|
||||
kind,
|
||||
src,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let subopcode = match kind {
|
||||
ShiftKind::RotateLeft => 0,
|
||||
ShiftKind::RotateRight => 1,
|
||||
@@ -805,7 +839,8 @@ pub(crate) fn emit(
|
||||
let enc_dst = int_reg_enc(dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
match num_bits {
|
||||
None => {
|
||||
Imm8Reg::Reg { reg } => {
|
||||
debug_assert_eq!(*reg, regs::rcx());
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xD2, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xD3, LegacyPrefixes::_66),
|
||||
@@ -820,7 +855,7 @@ pub(crate) fn emit(
|
||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
|
||||
}
|
||||
|
||||
Some(num_bits) => {
|
||||
Imm8Reg::Imm8 { imm: num_bits } => {
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xC0, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xC1, LegacyPrefixes::_66),
|
||||
@@ -840,10 +875,16 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::XmmRmiReg { opcode, src, dst } => {
|
||||
Inst::XmmRmiReg {
|
||||
opcode,
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, dst.to_reg());
|
||||
let rex = RexFlags::clear_w();
|
||||
let prefix = LegacyPrefixes::_66;
|
||||
if let RegMemImm::Imm { simm32 } = src {
|
||||
if let RegMemImm::Imm { simm32 } = src2 {
|
||||
let (opcode_bytes, reg_digit) = match opcode {
|
||||
SseOpcode::Psllw => (0x0F71, 6),
|
||||
SseOpcode::Pslld => (0x0F72, 6),
|
||||
@@ -874,7 +915,7 @@ pub(crate) fn emit(
|
||||
_ => panic!("invalid opcode: {}", opcode),
|
||||
};
|
||||
|
||||
match src {
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex);
|
||||
}
|
||||
@@ -993,9 +1034,11 @@ pub(crate) fn emit(
|
||||
Inst::Cmove {
|
||||
size,
|
||||
cc,
|
||||
src,
|
||||
consequent,
|
||||
alternative,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
debug_assert_eq!(*alternative, reg_g.to_reg());
|
||||
let rex_flags = RexFlags::from(*size);
|
||||
let prefix = match size {
|
||||
OperandSize::Size16 => LegacyPrefixes::_66,
|
||||
@@ -1004,7 +1047,7 @@ pub(crate) fn emit(
|
||||
_ => unreachable!("invalid size spec for cmove"),
|
||||
};
|
||||
let opcode = 0x0F40 + cc.get_enc() as u32;
|
||||
match src {
|
||||
match consequent {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
|
||||
}
|
||||
@@ -1433,9 +1476,11 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::XmmRmR {
|
||||
op,
|
||||
src: src_e,
|
||||
src1,
|
||||
src2: src_e,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, reg_g.to_reg());
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode, length) = match op {
|
||||
SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
|
||||
@@ -1678,11 +1723,13 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::XmmRmRImm {
|
||||
op,
|
||||
src,
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
imm,
|
||||
size,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, dst.to_reg());
|
||||
let (prefix, opcode, len) = match op {
|
||||
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
|
||||
SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
|
||||
@@ -1713,7 +1760,7 @@ pub(crate) fn emit(
|
||||
// `src` in ModRM's r/m field.
|
||||
_ => false,
|
||||
};
|
||||
match src {
|
||||
match src2 {
|
||||
RegMem::Reg { reg } => {
|
||||
if regs_swapped {
|
||||
emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
|
||||
@@ -2403,8 +2450,17 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::LockCmpxchg { ty, src, dst } => {
|
||||
// lock cmpxchg{b,w,l,q} %src, (dst)
|
||||
Inst::LockCmpxchg {
|
||||
ty,
|
||||
replacement,
|
||||
expected,
|
||||
mem,
|
||||
dst_old,
|
||||
} => {
|
||||
debug_assert_eq!(*expected, regs::rax());
|
||||
debug_assert_eq!(dst_old.to_reg(), regs::rax());
|
||||
|
||||
// lock cmpxchg{b,w,l,q} %replacement, (mem)
|
||||
// Note that 0xF0 is the Lock prefix.
|
||||
let (prefix, opcodes) = match *ty {
|
||||
types::I8 => (LegacyPrefixes::_F0, 0x0FB0),
|
||||
@@ -2413,12 +2469,34 @@ pub(crate) fn emit(
|
||||
types::I64 => (LegacyPrefixes::_F0, 0x0FB1),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rex = RexFlags::from((OperandSize::from_ty(*ty), *src));
|
||||
let amode = dst.finalize(state, sink);
|
||||
emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex);
|
||||
let rex = RexFlags::from((OperandSize::from_ty(*ty), *replacement));
|
||||
let amode = mem.finalize(state, sink);
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcodes,
|
||||
2,
|
||||
*replacement,
|
||||
&amode,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
|
||||
Inst::AtomicRmwSeq { ty, op } => {
|
||||
Inst::AtomicRmwSeq {
|
||||
ty,
|
||||
op,
|
||||
address,
|
||||
operand,
|
||||
temp,
|
||||
dst_old,
|
||||
} => {
|
||||
debug_assert_eq!(*address, regs::r9());
|
||||
debug_assert_eq!(*operand, regs::r10());
|
||||
debug_assert_eq!(temp.to_reg(), regs::r11());
|
||||
debug_assert_eq!(dst_old.to_reg(), regs::rax());
|
||||
|
||||
// Emit this:
|
||||
//
|
||||
// mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value
|
||||
@@ -2516,8 +2594,10 @@ pub(crate) fn emit(
|
||||
// No need to call `add_trap` here, since the `i4` emit will do that.
|
||||
let i4 = Inst::LockCmpxchg {
|
||||
ty: *ty,
|
||||
src: r11,
|
||||
dst: amode.into(),
|
||||
replacement: r11,
|
||||
expected: regs::rax(),
|
||||
mem: amode.into(),
|
||||
dst_old: Writable::from_reg(regs::rax()),
|
||||
};
|
||||
i4.emit(sink, info, state);
|
||||
|
||||
|
||||
@@ -4199,8 +4199,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rbx,
|
||||
dst: am1,
|
||||
mem: am1,
|
||||
replacement: rbx,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0410FB09C9241010000",
|
||||
"lock cmpxchgb %bl, 321(%r10,%rdx,4)",
|
||||
@@ -4209,8 +4211,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rdx,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rdx,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F00FB094F1C7CFFFFF",
|
||||
"lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
|
||||
@@ -4218,8 +4222,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0400FB0B4F1C7CFFFFF",
|
||||
"lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
|
||||
@@ -4227,8 +4233,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0440FB094F1C7CFFFFF",
|
||||
"lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
|
||||
@@ -4236,8 +4244,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: r15,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r15,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0440FB0BCF1C7CFFFFF",
|
||||
"lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
|
||||
@@ -4246,8 +4256,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I16,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"66F00FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
|
||||
@@ -4255,8 +4267,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I16,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"66F0440FB194F1C7CFFFFF",
|
||||
"lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
|
||||
@@ -4265,8 +4279,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I32,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F00FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
|
||||
@@ -4274,8 +4290,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I32,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0440FB194F1C7CFFFFF",
|
||||
"lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
|
||||
@@ -4284,8 +4302,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I64,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0480FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
|
||||
@@ -4293,8 +4313,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I64,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F04C0FB194F1C7CFFFFF",
|
||||
"lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
|
||||
@@ -4302,27 +4324,62 @@ fn test_x64_emit() {
|
||||
|
||||
// AtomicRmwSeq
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I8,
|
||||
op: inst_common::AtomicRmwOp::Or,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"490FB6014989C34D09D3F0450FB0190F85EFFFFFFF",
|
||||
"atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I16,
|
||||
op: inst_common::AtomicRmwOp::And,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"490FB7014989C34D21D366F0450FB1190F85EEFFFFFF",
|
||||
"atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I32,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"418B014989C34D89D3F0450FB1190F85EFFFFFFF",
|
||||
"atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Umin, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I32,
|
||||
op: inst_common::AtomicRmwOp::Umin,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"418B014989C34539DA4D0F46DAF0450FB1190F85EBFFFFFF",
|
||||
"atomically { 32_bits_at_[%r9]) Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I64,
|
||||
op: inst_common::AtomicRmwOp::Add,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"498B014989C34D01D3F04D0FB1190F85EFFFFFFF",
|
||||
"atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
947
cranelift/codegen/src/isa/x64/lower.isle
Normal file
947
cranelift/codegen/src/isa/x64/lower.isle
Normal file
@@ -0,0 +1,947 @@
|
||||
;; x86-64 instruction selection and CLIF-to-MachInst lowering.
|
||||
|
||||
;; The main lowering constructor term: takes a clif `Inst` and returns the
|
||||
;; register(s) within which the lowered instruction's result values live.
|
||||
(decl lower (Inst) ValueRegs)
|
||||
|
||||
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iconst (u64_from_imm64 x))))
|
||||
(value_reg (imm ty x)))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128
|
||||
(iconst (u64_from_imm64 x))))
|
||||
(value_regs (imm $I64 x)
|
||||
(imm $I64 0)))
|
||||
|
||||
;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `b64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bconst $false)))
|
||||
(value_reg (imm ty 0)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bconst $true)))
|
||||
(value_reg (imm ty 1)))
|
||||
|
||||
;; `b128`
|
||||
|
||||
(rule (lower (has_type $B128
|
||||
(bconst $false)))
|
||||
(value_regs (imm $B64 0)
|
||||
(imm $B64 0)))
|
||||
|
||||
(rule (lower (has_type $B128
|
||||
(bconst $true)))
|
||||
(value_regs (imm $B64 1)
|
||||
(imm $B64 0)))
|
||||
|
||||
;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty (null)))
|
||||
(value_reg (imm ty 0)))
|
||||
|
||||
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; Add two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd x y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Add a register and an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd x (simm32_from_value y))))
|
||||
(value_reg (add ty (put_in_reg x) y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd (simm32_from_value x) y)))
|
||||
(value_reg (add ty (put_in_reg y) x)))
|
||||
|
||||
;; Add a register and memory.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd x (sinkable_load y))))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd (sinkable_load x) y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(iadd x y)))
|
||||
(value_reg (paddb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(iadd x y)))
|
||||
(value_reg (paddw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(iadd x y)))
|
||||
(value_reg (paddd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(iadd x y)))
|
||||
(value_reg (paddq (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128 (iadd x y)))
|
||||
;; Get the high/low registers for `x`.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1)))
|
||||
;; Get the high/low registers for `y`.
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
;; Do an add followed by an add-with-carry.
|
||||
(with_flags (add_with_flags $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(adc $I64 x_hi (RegMemImm.Reg y_hi))))))
|
||||
|
||||
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(sadd_sat x y)))
|
||||
(value_reg (paddsb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(sadd_sat x y)))
|
||||
(value_reg (paddsw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(uadd_sat x y)))
|
||||
(value_reg (paddusb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(uadd_sat x y)))
|
||||
(value_reg (paddusw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `iadd_ifcout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Add two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout x y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Add a register and an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout x (simm32_from_value y))))
|
||||
(value_reg (add ty (put_in_reg x) y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout (simm32_from_value x) y)))
|
||||
(value_reg (add ty (put_in_reg y) x)))
|
||||
|
||||
;; Add a register and memory.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout x (sinkable_load y))))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout (sinkable_load x) y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; (No `iadd_ifcout` for `i128`.)
|
||||
|
||||
;;;; Rules for `iadd_imm` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; When the immediate fits in a `RegMemImm.Imm`, use that.
|
||||
(rule (lower (has_type (fits_in_64 ty) (iadd_imm (simm32_from_imm64 x) y)))
|
||||
(value_reg (add ty (put_in_reg y) x)))
|
||||
|
||||
;; Otherwise, put the immediate into a register.
|
||||
(rule (lower (has_type (fits_in_64 ty) (iadd_imm (u64_from_imm64 x) y)))
|
||||
(value_reg (add ty (put_in_reg y) (RegMemImm.Reg (imm ty x)))))
|
||||
|
||||
;; `i128`
|
||||
|
||||
;; When the immediate fits in a `RegMemImm.Imm`, use that.
|
||||
(rule (lower (has_type $I128 (iadd_imm (simm32_from_imm64 x) y)))
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
(with_flags (add_with_flags $I64 y_lo x)
|
||||
(adc $I64 y_hi (RegMemImm.Imm 0)))))
|
||||
|
||||
;; Otherwise, put the immediate into a register.
|
||||
(rule (lower (has_type $I128 (iadd_imm (u64_from_imm64 x) y)))
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1))
|
||||
(x_lo Reg (imm $I64 x)))
|
||||
(with_flags (add_with_flags $I64 y_lo (RegMemImm.Reg x_lo))
|
||||
(adc $I64 y_hi (RegMemImm.Imm 0)))))
|
||||
|
||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; Sub two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(isub x y)))
|
||||
(value_reg (sub ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Sub a register and an immediate.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(isub x (simm32_from_value y))))
|
||||
(value_reg (sub ty (put_in_reg x) y)))
|
||||
|
||||
;; Sub a register and memory.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(isub x (sinkable_load y))))
|
||||
(value_reg (sub ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(isub x y)))
|
||||
(value_reg (psubb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(isub x y)))
|
||||
(value_reg (psubw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(isub x y)))
|
||||
(value_reg (psubd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(isub x y)))
|
||||
(value_reg (psubq (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128 (isub x y)))
|
||||
;; Get the high/low registers for `x`.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1)))
|
||||
;; Get the high/low registers for `y`.
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
;; Do a sub followed by an sub-with-borrow.
|
||||
(with_flags (sub_with_flags $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(sbb $I64 x_hi (RegMemImm.Reg y_hi))))))
|
||||
|
||||
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(ssub_sat x y)))
|
||||
(value_reg (psubsb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(ssub_sat x y)))
|
||||
(value_reg (psubsw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(usub_sat x y)))
|
||||
(value_reg (psubusb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(usub_sat x y)))
|
||||
(value_reg (psubusw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; And two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (band x y)))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; And with a memory operand.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band x (sinkable_load y))))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band (sinkable_load x) y)))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; And with an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band x (simm32_from_value y))))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg x)
|
||||
y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band (simm32_from_value x) y)))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg y)
|
||||
x)))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type $F32X4 (band x y)))
|
||||
(value_reg (andps (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (band x y)))
|
||||
(value_reg (andpd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane _bits _lanes)
|
||||
(band x y)))
|
||||
(value_reg (pand (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
(rule (lower (has_type $I128 (band x y)))
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
(value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(m_and $I64 x_hi (RegMemImm.Reg y_hi)))))
|
||||
|
||||
(rule (lower (has_type $B128 (band x y)))
|
||||
;; Booleans are always `0` or `1`, so we only need to do the `and` on the
|
||||
;; low half. The high half is always zero but, rather than generate a new
|
||||
;; zero, we just reuse `x`'s high half which is already zero.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_lo Reg (lo_reg y)))
|
||||
(value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
x_hi)))
|
||||
|
||||
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; Or two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (bor x y)))
|
||||
(value_reg (or ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Or with a memory operand.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor x (sinkable_load y))))
|
||||
(value_reg (or ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor (sinkable_load x) y)))
|
||||
(value_reg (or ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; Or with an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor x (simm32_from_value y))))
|
||||
(value_reg (or ty
|
||||
(put_in_reg x)
|
||||
y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor (simm32_from_value x) y)))
|
||||
(value_reg (or ty
|
||||
(put_in_reg y)
|
||||
x)))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type $F32X4 (bor x y)))
|
||||
(value_reg (orps (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (bor x y)))
|
||||
(value_reg (orpd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane _bits _lanes)
|
||||
(bor x y)))
|
||||
(value_reg (por (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
(decl or_i128 (ValueRegs ValueRegs) ValueRegs)
|
||||
(rule (or_i128 x y)
|
||||
(let ((x_lo Reg (value_regs_get x 0))
|
||||
(x_hi Reg (value_regs_get x 1))
|
||||
(y_lo Reg (value_regs_get y 0))
|
||||
(y_hi Reg (value_regs_get y 1)))
|
||||
(value_regs (or $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(or $I64 x_hi (RegMemImm.Reg y_hi)))))
|
||||
|
||||
(rule (lower (has_type $I128 (bor x y)))
|
||||
(or_i128 (put_in_regs x) (put_in_regs y)))
|
||||
|
||||
(rule (lower (has_type $B128 (bor x y)))
|
||||
;; Booleans are always `0` or `1`, so we only need to do the `or` on the
|
||||
;; low half. The high half is always zero but, rather than generate a new
|
||||
;; zero, we just reuse `x`'s high half which is already zero.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_lo Reg (lo_reg y)))
|
||||
(value_regs (or $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
x_hi)))
|
||||
|
||||
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; Xor two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (bxor x y)))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Xor with a memory operand.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor x (sinkable_load y))))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor (sinkable_load x) y)))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; Xor with an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor x (simm32_from_value y))))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg x)
|
||||
y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor (simm32_from_value x) y)))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg y)
|
||||
x)))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type $F32X4 (bxor x y)))
|
||||
(value_reg (xorps (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (bxor x y)))
|
||||
(value_reg (xorpd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane _bits _lanes)
|
||||
(bxor x y)))
|
||||
(value_reg (pxor (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
(rule (lower (has_type $I128 (bxor x y)))
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
(value_regs (xor $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(xor $I64 x_hi (RegMemImm.Reg y_hi)))))
|
||||
|
||||
(rule (lower (has_type $B128 (bxor x y)))
|
||||
;; Booleans are always `0` or `1`, so we only need to do the `xor` on the
|
||||
;; low half. The high half is always zero but, rather than generate a new
|
||||
;; zero, we just reuse `x`'s high half which is already zero.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_lo Reg (lo_reg y)))
|
||||
(value_regs (xor $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
x_hi)))
|
||||
|
||||
;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ishl src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||
;; amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(value_reg (shl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ishl src (imm8_from_value amt))))
|
||||
(value_reg (shl ty (put_in_reg src) amt)))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(decl shl_i128 (ValueRegs Reg) ValueRegs)
|
||||
(rule (shl_i128 src amt)
|
||||
;; Unpack the registers that make up the 128-bit value being shifted.
|
||||
(let ((src_lo Reg (value_regs_get src 0))
|
||||
(src_hi Reg (value_regs_get src 1))
|
||||
;; Do two 64-bit shifts.
|
||||
(lo_shifted Reg (shl $I64 src_lo (Imm8Reg.Reg amt)))
|
||||
(hi_shifted Reg (shl $I64 src_hi (Imm8Reg.Reg amt)))
|
||||
;; `src_lo >> (64 - amt)` are the bits to carry over from the lo
|
||||
;; into the hi.
|
||||
(carry Reg (shr $I64 src_lo (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt)))))
|
||||
(zero Reg (imm $I64 0))
|
||||
;; Nullify the carry if we are shifting in by a multiple of 128.
|
||||
(carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg zero) carry)))
|
||||
;; Add the carry into the high half.
|
||||
(hi_shifted_ Reg (or $I64 carry_ (RegMemImm.Reg hi_shifted))))
|
||||
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
||||
;; (modulo 128), then the low bits are zero and the high bits are our
|
||||
;; low bits.
|
||||
(with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg lo_shifted) zero)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg hi_shifted_) lo_shifted))))
|
||||
|
||||
(rule (lower (has_type $I128 (ishl src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||
;; amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(shl_i128 (put_in_regs src) amt_)))
|
||||
|
||||
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ushr src amt)))
|
||||
(let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the
|
||||
;; shift amount to the value's bit width.
|
||||
(amt_ Reg (lo_reg amt)))
|
||||
(value_reg (shr ty src_ (Imm8Reg.Reg amt_)))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ushr src (imm8_from_value amt))))
|
||||
(let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero))))
|
||||
(value_reg (shr ty src_ amt))))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(decl shr_i128 (ValueRegs Reg) ValueRegs)
|
||||
(rule (shr_i128 src amt)
|
||||
;; Unpack the lo/hi halves of `src`.
|
||||
(let ((src_lo Reg (value_regs_get src 0))
|
||||
(src_hi Reg (value_regs_get src 1))
|
||||
;; Do a shift on each half.
|
||||
(lo_shifted Reg (shr $I64 src_lo (Imm8Reg.Reg amt)))
|
||||
(hi_shifted Reg (shr $I64 src_hi (Imm8Reg.Reg amt)))
|
||||
;; `src_hi << (64 - amt)` are the bits to carry over from the hi
|
||||
;; into the lo.
|
||||
(carry Reg (shl $I64 src_hi (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt)))))
|
||||
;; Nullify the carry if we are shifting by a multiple of 128.
|
||||
(carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg (imm $I64 0)) carry)))
|
||||
;; Add the carry bits into the lo.
|
||||
(lo_shifted_ Reg (or $I64 carry_ (RegMemImm.Reg lo_shifted))))
|
||||
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
||||
;; (modulo 128), then the hi bits are zero and the lo bits are what
|
||||
;; would otherwise be our hi bits.
|
||||
(with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg lo_shifted_) hi_shifted)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg hi_shifted) (imm $I64 0)))))
|
||||
|
||||
(rule (lower (has_type $I128 (ushr src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||
;; amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(shr_i128 (put_in_regs src) amt_)))
|
||||
|
||||
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the
|
||||
;; shift amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (rotl src (imm8_from_value amt))))
|
||||
(value_reg (m_rotl ty (put_in_reg src) amt)))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(rule (lower (has_type $I128 (rotl src amt)))
|
||||
(let ((src_ ValueRegs (put_in_regs src))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the
|
||||
;; rotation amount to the value's bit width.
|
||||
(amt_ Reg (lo_reg amt)))
|
||||
(or_i128 (shl_i128 src_ amt_)
|
||||
(shr_i128 src_ (sub $I64 (imm $I64 128) (RegMemImm.Reg amt_))))))
|
||||
|
||||
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(avg_round x y)))
|
||||
(value_reg (pavgb (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(avg_round x y)))
|
||||
(value_reg (pavgw (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; Multiply two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (imul x y)))
|
||||
(value_reg (mul ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Multiply a register and an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul x (simm32_from_value y))))
|
||||
(value_reg (mul ty (put_in_reg x) y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul (simm32_from_value x) y)))
|
||||
(value_reg (mul ty (put_in_reg y) x)))
|
||||
|
||||
;; Multiply a register and a memory load.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul x (sinkable_load y))))
|
||||
(value_reg (mul ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul (sinkable_load x) y)))
|
||||
(value_reg (mul ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
;; mul:
|
||||
;; dst_lo = lhs_lo * rhs_lo
|
||||
;; dst_hi = umulhi(lhs_lo, rhs_lo) +
|
||||
;; lhs_lo * rhs_hi +
|
||||
;; lhs_hi * rhs_lo
|
||||
;;
|
||||
;; so we emit:
|
||||
;; lo_hi = mul x_lo, y_hi
|
||||
;; hi_lo = mul x_hi, y_lo
|
||||
;; hilo_hilo = add lo_hi, hi_lo
|
||||
;; dst_lo:hi_lolo = mulhi_u x_lo, y_lo
|
||||
;; dst_hi = add hilo_hilo, hi_lolo
|
||||
;; return (dst_lo, dst_hi)
|
||||
(rule (lower (has_type $I128 (imul x y)))
|
||||
;; Put `x` into registers and unpack its hi/lo halves.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
;; Put `y` into registers and unpack its hi/lo halves.
|
||||
(y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1))
|
||||
;; lo_hi = mul x_lo, y_hi
|
||||
(lo_hi Reg (mul $I64 x_lo (RegMemImm.Reg y_hi)))
|
||||
;; hi_lo = mul x_hi, y_lo
|
||||
(hi_lo Reg (mul $I64 x_hi (RegMemImm.Reg y_lo)))
|
||||
;; hilo_hilo = add lo_hi, hi_lo
|
||||
(hilo_hilo Reg (add $I64 lo_hi (RegMemImm.Reg hi_lo)))
|
||||
;; dst_lo:hi_lolo = mulhi_u x_lo, y_lo
|
||||
(mul_regs ValueRegs (mulhi_u $I64 x_lo (RegMem.Reg y_lo)))
|
||||
(dst_lo Reg (value_regs_get mul_regs 0))
|
||||
(hi_lolo Reg (value_regs_get mul_regs 1))
|
||||
;; dst_hi = add hilo_hilo, hi_lolo
|
||||
(dst_hi Reg (add $I64 hilo_hilo (RegMemImm.Reg hi_lolo))))
|
||||
(value_regs dst_lo dst_hi)))
|
||||
|
||||
;; SSE.
|
||||
|
||||
;; (No i8x16 multiply.)
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8) (imul x y)))
|
||||
(value_reg (pmullw (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4) (imul x y)))
|
||||
(value_reg (pmulld (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
;; With AVX-512 we can implement `i64x2` multiplication with a single
|
||||
;; instruction.
|
||||
(rule (lower (has_type (and (avx512vl_enabled)
|
||||
(avx512dq_enabled)
|
||||
(multi_lane 64 2))
|
||||
(imul x y)))
|
||||
(value_reg (vpmullq (put_in_reg_mem x) (put_in_reg y))))
|
||||
|
||||
;; Otherwise, for i64x2 multiplication we describe a lane A as being composed of
|
||||
;; a 32-bit upper half "Ah" and a 32-bit lower half "Al". The 32-bit long hand
|
||||
;; multiplication can then be written as:
|
||||
;;
|
||||
;; Ah Al
|
||||
;; * Bh Bl
|
||||
;; -----
|
||||
;; Al * Bl
|
||||
;; + (Ah * Bl) << 32
|
||||
;; + (Al * Bh) << 32
|
||||
;;
|
||||
;; So for each lane we will compute:
|
||||
;;
|
||||
;; A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
|
||||
;;
|
||||
;; Note, the algorithm will use `pmuldq` which operates directly on the lower
|
||||
;; 32-bit (`Al` or `Bl`) of a lane and writes the result to the full 64-bits of
|
||||
;; the lane of the destination. For this reason we don't need shifts to isolate
|
||||
;; the lower 32-bits, however, we will need to use shifts to isolate the high
|
||||
;; 32-bits when doing calculations, i.e., `Ah == A >> 32`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul a b)))
|
||||
(let ((a0 Reg (put_in_reg a))
|
||||
(b0 Reg (put_in_reg b))
|
||||
;; a_hi = A >> 32
|
||||
(a_hi Reg (psrlq a0 (RegMemImm.Imm 32)))
|
||||
;; ah_bl = Ah * Bl
|
||||
(ah_bl Reg (pmuludq a_hi (RegMem.Reg b0)))
|
||||
;; b_hi = B >> 32
|
||||
(b_hi Reg (psrlq b0 (RegMemImm.Imm 32)))
|
||||
;; al_bh = Al * Bh
|
||||
(al_bh Reg (pmuludq a0 (RegMem.Reg b_hi)))
|
||||
;; aa_bb = ah_bl + al_bh
|
||||
(aa_bb Reg (paddq ah_bl (RegMem.Reg al_bh)))
|
||||
;; aa_bb_shifted = aa_bb << 32
|
||||
(aa_bb_shifted Reg (psllq aa_bb (RegMemImm.Imm 32)))
|
||||
;; al_bl = Al * Bl
|
||||
(al_bl Reg (pmuludq a0 (RegMem.Reg b0))))
|
||||
;; al_bl + aa_bb_shifted
|
||||
(value_reg (paddq al_bl (RegMem.Reg aa_bb_shifted)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_high_i8x16_s`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (swiden_high (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x1 Reg (put_in_reg x))
|
||||
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
|
||||
(x3 Reg (pmovsxbw (RegMem.Reg x2)))
|
||||
(y1 Reg (put_in_reg y))
|
||||
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
|
||||
(y3 Reg (pmovsxbw (RegMem.Reg y2))))
|
||||
(value_reg (pmullw x3 (RegMem.Reg y3)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_high_i16x8_s`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (swiden_high (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_high_i32x4_s`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (swiden_high (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0xFA
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0xFA
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_low_i8x16_s`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (swiden_low (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x2 Reg (pmovsxbw (put_in_reg_mem x)))
|
||||
(y2 Reg (pmovsxbw (put_in_reg_mem y))))
|
||||
(value_reg (pmullw x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_low_i16x8_s`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (swiden_low (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_low_i32x4_s`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (swiden_low (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0x50
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0x50
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_high_i8x16_u`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (uwiden_high (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x1 Reg (put_in_reg x))
|
||||
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
|
||||
(x3 Reg (pmovzxbw (RegMem.Reg x2)))
|
||||
(y1 Reg (put_in_reg y))
|
||||
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
|
||||
(y3 Reg (pmovzxbw (RegMem.Reg y2))))
|
||||
(value_reg (pmullw x3 (RegMem.Reg y3)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_high_i16x8_u`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (uwiden_high (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_high_i32x4_u`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (uwiden_high (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0xFA
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0xFA
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_low_i8x16_u`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (uwiden_low (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x2 Reg (pmovzxbw (put_in_reg_mem x)))
|
||||
(y2 Reg (pmovzxbw (put_in_reg_mem y))))
|
||||
(value_reg (pmullw x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_low_i16x8_u`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (uwiden_low (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_low_i32x4_u`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (uwiden_low (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0x50
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0x50
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
|
||||
|
||||
;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Note the flipping of operands below. CLIF specifies
|
||||
;;
|
||||
;; band_not(x, y) = and(x, not(y))
|
||||
;;
|
||||
;; while x86 does
|
||||
;;
|
||||
;; pandn(x, y) = and(not(x), y)
|
||||
|
||||
(rule (lower (has_type $F32X4 (band_not x y)))
|
||||
(value_reg (andnps (put_in_reg y) (put_in_reg_mem x))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (band_not x y)))
|
||||
(value_reg (andnpd (put_in_reg y) (put_in_reg_mem x))))
|
||||
|
||||
(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
|
||||
(value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))
|
||||
@@ -1,5 +1,8 @@
|
||||
//! Lowering rules for X64.
|
||||
|
||||
// ISLE integration glue.
|
||||
mod isle;
|
||||
|
||||
use crate::data_value::DataValue;
|
||||
use crate::ir::{
|
||||
condcodes::{CondCode, FloatCC, IntCC},
|
||||
@@ -1497,20 +1500,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
None
|
||||
};
|
||||
|
||||
match op {
|
||||
Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
|
||||
let value = ctx
|
||||
.get_constant(insn)
|
||||
.expect("constant value for iconst et al");
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
for inst in Inst::gen_constant(dst, value as u128, ty.unwrap(), |ty| {
|
||||
ctx.alloc_tmp(ty).only_reg().unwrap()
|
||||
}) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
if let Ok(()) = isle::lower(ctx, isa_flags, &outputs, insn) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Opcode::Iadd
|
||||
match op {
|
||||
Opcode::Iconst
|
||||
| Opcode::Bconst
|
||||
| Opcode::Null
|
||||
| Opcode::Iadd
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::SaddSat
|
||||
| Opcode::UaddSat
|
||||
@@ -1520,755 +1518,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::AvgRound
|
||||
| Opcode::Band
|
||||
| Opcode::Bor
|
||||
| Opcode::Bxor => {
|
||||
let ty = ty.unwrap();
|
||||
if ty.lane_count() > 1 {
|
||||
let sse_op = match op {
|
||||
Opcode::Iadd => match ty {
|
||||
types::I8X16 => SseOpcode::Paddb,
|
||||
types::I16X8 => SseOpcode::Paddw,
|
||||
types::I32X4 => SseOpcode::Paddd,
|
||||
types::I64X2 => SseOpcode::Paddq,
|
||||
_ => panic!("Unsupported type for packed iadd instruction: {}", ty),
|
||||
},
|
||||
Opcode::SaddSat => match ty {
|
||||
types::I8X16 => SseOpcode::Paddsb,
|
||||
types::I16X8 => SseOpcode::Paddsw,
|
||||
_ => panic!("Unsupported type for packed sadd_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::UaddSat => match ty {
|
||||
types::I8X16 => SseOpcode::Paddusb,
|
||||
types::I16X8 => SseOpcode::Paddusw,
|
||||
_ => panic!("Unsupported type for packed uadd_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::Isub => match ty {
|
||||
types::I8X16 => SseOpcode::Psubb,
|
||||
types::I16X8 => SseOpcode::Psubw,
|
||||
types::I32X4 => SseOpcode::Psubd,
|
||||
types::I64X2 => SseOpcode::Psubq,
|
||||
_ => panic!("Unsupported type for packed isub instruction: {}", ty),
|
||||
},
|
||||
Opcode::SsubSat => match ty {
|
||||
types::I8X16 => SseOpcode::Psubsb,
|
||||
types::I16X8 => SseOpcode::Psubsw,
|
||||
_ => panic!("Unsupported type for packed ssub_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::UsubSat => match ty {
|
||||
types::I8X16 => SseOpcode::Psubusb,
|
||||
types::I16X8 => SseOpcode::Psubusw,
|
||||
_ => panic!("Unsupported type for packed usub_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::AvgRound => match ty {
|
||||
types::I8X16 => SseOpcode::Pavgb,
|
||||
types::I16X8 => SseOpcode::Pavgw,
|
||||
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
|
||||
},
|
||||
Opcode::Band => match ty {
|
||||
types::F32X4 => SseOpcode::Andps,
|
||||
types::F64X2 => SseOpcode::Andpd,
|
||||
_ => SseOpcode::Pand,
|
||||
},
|
||||
Opcode::Bor => match ty {
|
||||
types::F32X4 => SseOpcode::Orps,
|
||||
types::F64X2 => SseOpcode::Orpd,
|
||||
_ => SseOpcode::Por,
|
||||
},
|
||||
Opcode::Bxor => match ty {
|
||||
types::F32X4 => SseOpcode::Xorps,
|
||||
types::F64X2 => SseOpcode::Xorpd,
|
||||
_ => SseOpcode::Pxor,
|
||||
},
|
||||
_ => panic!("Unsupported packed instruction: {}", op),
|
||||
};
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
// Move the `lhs` to the same register as `dst`.
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
|
||||
} else if ty == types::I128 || ty == types::B128 {
|
||||
let alu_ops = match op {
|
||||
Opcode::Iadd => (AluRmiROpcode::Add, AluRmiROpcode::Adc),
|
||||
Opcode::Isub => (AluRmiROpcode::Sub, AluRmiROpcode::Sbb),
|
||||
Opcode::Band => (AluRmiROpcode::And, AluRmiROpcode::And),
|
||||
Opcode::Bor => (AluRmiROpcode::Or, AluRmiROpcode::Or),
|
||||
Opcode::Bxor => (AluRmiROpcode::Xor, AluRmiROpcode::Xor),
|
||||
_ => panic!("Unsupported opcode with 128-bit integers: {:?}", op),
|
||||
};
|
||||
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
assert_eq!(lhs.len(), 2);
|
||||
assert_eq!(rhs.len(), 2);
|
||||
assert_eq!(dst.len(), 2);
|
||||
|
||||
// For add, sub, and, or, xor: just do ops on lower then upper
|
||||
// half. Carry-flag propagation is implicit (add/adc, sub/sbb).
|
||||
ctx.emit(Inst::gen_move(dst.regs()[0], lhs.regs()[0], types::I64));
|
||||
ctx.emit(Inst::gen_move(dst.regs()[1], lhs.regs()[1], types::I64));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
alu_ops.0,
|
||||
RegMemImm::reg(rhs.regs()[0]),
|
||||
dst.regs()[0],
|
||||
));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
alu_ops.1,
|
||||
RegMemImm::reg(rhs.regs()[1]),
|
||||
dst.regs()[1],
|
||||
));
|
||||
} else {
|
||||
let size = if ty == types::I64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
};
|
||||
let alu_op = match op {
|
||||
Opcode::Iadd | Opcode::IaddIfcout => AluRmiROpcode::Add,
|
||||
Opcode::Isub => AluRmiROpcode::Sub,
|
||||
Opcode::Band => AluRmiROpcode::And,
|
||||
Opcode::Bor => AluRmiROpcode::Or,
|
||||
Opcode::Bxor => AluRmiROpcode::Xor,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let (lhs, rhs) = match op {
|
||||
Opcode::Iadd
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::Band
|
||||
| Opcode::Bor
|
||||
| Opcode::Bxor => {
|
||||
// For commutative operations, try to commute operands if one is an
|
||||
// immediate or direct memory reference. Do so by converting LHS to RMI; if
|
||||
// reg, then always convert RHS to RMI; else, use LHS as RMI and convert
|
||||
// RHS to reg.
|
||||
let lhs = input_to_reg_mem_imm(ctx, inputs[0]);
|
||||
if let RegMemImm::Reg { reg: lhs_reg } = lhs {
|
||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||
(lhs_reg, rhs)
|
||||
} else {
|
||||
let rhs_reg = put_input_in_reg(ctx, inputs[1]);
|
||||
(rhs_reg, lhs)
|
||||
}
|
||||
}
|
||||
Opcode::Isub => (
|
||||
put_input_in_reg(ctx, inputs[0]),
|
||||
input_to_reg_mem_imm(ctx, inputs[1]),
|
||||
),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst));
|
||||
ctx.emit(Inst::alu_rmi_r(size, alu_op, rhs, dst));
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Imul => {
|
||||
let ty = ty.unwrap();
|
||||
|
||||
// Check for ext_mul_* instructions which are being shared here under imul. We must
|
||||
// check first for operands that are opcodes since checking for types is not enough.
|
||||
if let Some(_) = matches_input_any(
|
||||
ctx,
|
||||
inputs[0],
|
||||
&[
|
||||
Opcode::SwidenHigh,
|
||||
Opcode::SwidenLow,
|
||||
Opcode::UwidenHigh,
|
||||
Opcode::UwidenLow,
|
||||
],
|
||||
) {
|
||||
// Optimized ext_mul_* lowerings are based on optimized lowerings
|
||||
// here: https://github.com/WebAssembly/simd/pull/376
|
||||
if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
|
||||
if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {
|
||||
let swiden_input = &[
|
||||
InsnInput {
|
||||
insn: swiden0_high,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: swiden1_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(swiden0_high, 0);
|
||||
let input1_ty = ctx.input_ty(swiden1_high, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_high_i8x16_s
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
));
|
||||
|
||||
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(dst.to_reg()),
|
||||
dst,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_high_i16x8_s
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpckhwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_high_i32x4_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuldq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note swiden_high only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(swiden0_low) = matches_input(ctx, inputs[0], Opcode::SwidenLow) {
|
||||
if let Some(swiden1_low) = matches_input(ctx, inputs[1], Opcode::SwidenLow) {
|
||||
let swiden_input = &[
|
||||
InsnInput {
|
||||
insn: swiden0_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: swiden1_low,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(swiden0_low, 0);
|
||||
let input1_ty = ctx.input_ty(swiden1_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i32x4.extmul_low_i8x16_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_low_i16x8_s
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpcklwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_low_i32x4_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuldq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note swiden_low only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh)
|
||||
{
|
||||
if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) {
|
||||
let uwiden_input = &[
|
||||
InsnInput {
|
||||
insn: uwiden0_high,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: uwiden1_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(uwiden0_high, 0);
|
||||
let input1_ty = ctx.input_ty(uwiden1_high, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_high_i8x16_u
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
));
|
||||
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(dst.to_reg()),
|
||||
dst,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_high_i16x8_u
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhuw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpckhwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_high_i32x4_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note uwiden_high only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_high_unsigned type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
|
||||
if let Some(uwiden1_low) = matches_input(ctx, inputs[1], Opcode::UwidenLow) {
|
||||
let uwiden_input = &[
|
||||
InsnInput {
|
||||
insn: uwiden0_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: uwiden1_low,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
|
||||
let input0_ty = ctx.input_ty(uwiden0_low, 0);
|
||||
let input1_ty = ctx.input_ty(uwiden1_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_low_i8x16_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_low_i16x8_u
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhuw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpcklwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_low_i32x4_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note uwiden_low only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_low_unsigned type"),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
panic!("Unsupported imul operation for type: {}", ty);
|
||||
}
|
||||
} else if ty == types::I64X2 {
|
||||
// Eventually one of these should be `input_to_reg_mem` (TODO).
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512dq_simd() {
|
||||
// With the right AVX512 features (VL + DQ) this operation
|
||||
// can lower to a single operation.
|
||||
ctx.emit(Inst::xmm_rm_r_evex(
|
||||
Avx512Opcode::Vpmullq,
|
||||
RegMem::reg(rhs),
|
||||
lhs,
|
||||
dst,
|
||||
));
|
||||
} else {
|
||||
// Otherwise, for I64X2 multiplication we describe a lane A as being
|
||||
// composed of a 32-bit upper half "Ah" and a 32-bit lower half
|
||||
// "Al". The 32-bit long hand multiplication can then be written
|
||||
// as:
|
||||
// Ah Al
|
||||
// * Bh Bl
|
||||
// -----
|
||||
// Al * Bl
|
||||
// + (Ah * Bl) << 32
|
||||
// + (Al * Bh) << 32
|
||||
//
|
||||
// So for each lane we will compute:
|
||||
// A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
|
||||
//
|
||||
// Note, the algorithm will use pmuldq which operates directly
|
||||
// on the lower 32-bit (Al or Bl) of a lane and writes the
|
||||
// result to the full 64-bits of the lane of the destination.
|
||||
// For this reason we don't need shifts to isolate the lower
|
||||
// 32-bits, however, we will need to use shifts to isolate the
|
||||
// high 32-bits when doing calculations, i.e., Ah == A >> 32.
|
||||
//
|
||||
// The full sequence then is as follows:
|
||||
// A' = A
|
||||
// A' = A' >> 32
|
||||
// A' = Ah' * Bl
|
||||
// B' = B
|
||||
// B' = B' >> 32
|
||||
// B' = Bh' * Al
|
||||
// B' = B' + A'
|
||||
// B' = B' << 32
|
||||
// A' = A
|
||||
// A' = Al' * Bl
|
||||
// A' = A' + B'
|
||||
// dst = A'
|
||||
|
||||
// A' = A
|
||||
let rhs_1 = ctx.alloc_tmp(types::I64X2).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
|
||||
|
||||
// A' = A' >> 32
|
||||
// A' = Ah' * Bl
|
||||
ctx.emit(Inst::xmm_rmi_reg(
|
||||
SseOpcode::Psrlq,
|
||||
RegMemImm::imm(32),
|
||||
rhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(lhs.clone()),
|
||||
rhs_1,
|
||||
));
|
||||
|
||||
// B' = B
|
||||
let lhs_1 = ctx.alloc_tmp(types::I64X2).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(lhs_1, lhs, ty));
|
||||
|
||||
// B' = B' >> 32
|
||||
// B' = Bh' * Al
|
||||
ctx.emit(Inst::xmm_rmi_reg(
|
||||
SseOpcode::Psrlq,
|
||||
RegMemImm::imm(32),
|
||||
lhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(rhs), lhs_1));
|
||||
|
||||
// B' = B' + A'
|
||||
// B' = B' << 32
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Paddq,
|
||||
RegMem::reg(rhs_1.to_reg()),
|
||||
lhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rmi_reg(
|
||||
SseOpcode::Psllq,
|
||||
RegMemImm::imm(32),
|
||||
lhs_1,
|
||||
));
|
||||
|
||||
// A' = A
|
||||
// A' = Al' * Bl
|
||||
// A' = A' + B'
|
||||
// dst = A'
|
||||
ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(lhs.clone()),
|
||||
rhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Paddq,
|
||||
RegMem::reg(lhs_1.to_reg()),
|
||||
rhs_1,
|
||||
));
|
||||
ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty));
|
||||
}
|
||||
} else if ty.lane_count() > 1 {
|
||||
// Emit single instruction lowerings for the remaining vector
|
||||
// multiplications.
|
||||
let sse_op = match ty {
|
||||
types::I16X8 => SseOpcode::Pmullw,
|
||||
types::I32X4 => SseOpcode::Pmulld,
|
||||
_ => panic!("Unsupported type for packed imul instruction: {}", ty),
|
||||
};
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
// Move the `lhs` to the same register as `dst`.
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
|
||||
} else if ty == types::I128 || ty == types::B128 {
|
||||
// Handle 128-bit multiplications.
|
||||
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
assert_eq!(lhs.len(), 2);
|
||||
assert_eq!(rhs.len(), 2);
|
||||
assert_eq!(dst.len(), 2);
|
||||
|
||||
// mul:
|
||||
// dst_lo = lhs_lo * rhs_lo
|
||||
// dst_hi = umulhi(lhs_lo, rhs_lo) + lhs_lo * rhs_hi + lhs_hi * rhs_lo
|
||||
//
|
||||
// so we emit:
|
||||
// mov dst_lo, lhs_lo
|
||||
// mul dst_lo, rhs_lo
|
||||
// mov dst_hi, lhs_lo
|
||||
// mul dst_hi, rhs_hi
|
||||
// mov tmp, lhs_hi
|
||||
// mul tmp, rhs_lo
|
||||
// add dst_hi, tmp
|
||||
// mov rax, lhs_lo
|
||||
// umulhi rhs_lo // implicit rax arg/dst
|
||||
// add dst_hi, rax
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(dst.regs()[0], lhs.regs()[0], types::I64));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::Mul,
|
||||
RegMemImm::reg(rhs.regs()[0]),
|
||||
dst.regs()[0],
|
||||
));
|
||||
ctx.emit(Inst::gen_move(dst.regs()[1], lhs.regs()[0], types::I64));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::Mul,
|
||||
RegMemImm::reg(rhs.regs()[1]),
|
||||
dst.regs()[1],
|
||||
));
|
||||
ctx.emit(Inst::gen_move(tmp, lhs.regs()[1], types::I64));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::Mul,
|
||||
RegMemImm::reg(rhs.regs()[0]),
|
||||
tmp,
|
||||
));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::Add,
|
||||
RegMemImm::reg(tmp.to_reg()),
|
||||
dst.regs()[1],
|
||||
));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(regs::rax()),
|
||||
lhs.regs()[0],
|
||||
types::I64,
|
||||
));
|
||||
ctx.emit(Inst::mul_hi(
|
||||
OperandSize::Size64,
|
||||
/* signed = */ false,
|
||||
RegMem::reg(rhs.regs()[0]),
|
||||
));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::Add,
|
||||
RegMemImm::reg(regs::rdx()),
|
||||
dst.regs()[1],
|
||||
));
|
||||
} else {
|
||||
let size = if ty == types::I64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
};
|
||||
let alu_op = AluRmiROpcode::Mul;
|
||||
|
||||
// For commutative operations, try to commute operands if one is
|
||||
// an immediate or direct memory reference. Do so by converting
|
||||
// LHS to RMI; if reg, then always convert RHS to RMI; else, use
|
||||
// LHS as RMI and convert RHS to reg.
|
||||
let lhs = input_to_reg_mem_imm(ctx, inputs[0]);
|
||||
let (lhs, rhs) = if let RegMemImm::Reg { reg: lhs_reg } = lhs {
|
||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||
(lhs_reg, rhs)
|
||||
} else {
|
||||
let rhs_reg = put_input_in_reg(ctx, inputs[1]);
|
||||
(rhs_reg, lhs)
|
||||
};
|
||||
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst));
|
||||
ctx.emit(Inst::alu_rmi_r(size, alu_op, rhs, dst));
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::BandNot => {
|
||||
let ty = ty.unwrap();
|
||||
debug_assert!(ty.is_vector() && ty.bytes() == 16);
|
||||
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let sse_op = match ty {
|
||||
types::F32X4 => SseOpcode::Andnps,
|
||||
types::F64X2 => SseOpcode::Andnpd,
|
||||
_ => SseOpcode::Pandn,
|
||||
};
|
||||
// Note the flipping of operands: the `rhs` operand is used as the destination instead
|
||||
// of the `lhs` as in the other bit operations above (e.g. `band`).
|
||||
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
|
||||
| Opcode::Bxor
|
||||
| Opcode::Imul
|
||||
| Opcode::BandNot => {
|
||||
unreachable!(
|
||||
"implemented in ISLE: inst = `{}`, type = `{:?}`",
|
||||
ctx.dfg().display_inst(insn),
|
||||
ty
|
||||
);
|
||||
}
|
||||
|
||||
Opcode::Iabs => {
|
||||
@@ -5801,7 +5058,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
// Now the AtomicRmwSeq (pseudo-) instruction itself
|
||||
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
||||
ctx.emit(Inst::AtomicRmwSeq { ty: ty_access, op });
|
||||
ctx.emit(Inst::AtomicRmwSeq {
|
||||
ty: ty_access,
|
||||
op,
|
||||
address: regs::r9(),
|
||||
operand: regs::r10(),
|
||||
temp: Writable::from_reg(regs::r11()),
|
||||
dst_old: Writable::from_reg(regs::rax()),
|
||||
});
|
||||
|
||||
// And finally, copy the preordained AtomicRmwSeq output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
||||
@@ -5827,8 +5091,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
));
|
||||
ctx.emit(Inst::LockCmpxchg {
|
||||
ty: ty_access,
|
||||
src: replacement,
|
||||
dst: addr.into(),
|
||||
mem: addr.into(),
|
||||
replacement,
|
||||
expected: regs::rax(),
|
||||
dst_old: Writable::from_reg(regs::rax()),
|
||||
});
|
||||
// And finally, copy the old value at the location to its destination reg.
|
||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
||||
|
||||
414
cranelift/codegen/src/isa/x64/lower/isle.rs
Normal file
414
cranelift/codegen/src/isa/x64/lower/isle.rs
Normal file
@@ -0,0 +1,414 @@
|
||||
//! ISLE integration glue code for x64 lowering.
|
||||
|
||||
// Pull in the ISLE generated code.
|
||||
mod generated_code;
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{
|
||||
is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
|
||||
Writable,
|
||||
};
|
||||
use crate::isa::x64::inst::args::SyntheticAmode;
|
||||
use crate::isa::x64::settings as x64_settings;
|
||||
use crate::{
|
||||
ir::{immediates::*, types::*, Inst, InstructionData, Opcode, Value, ValueList},
|
||||
isa::x64::inst::{
|
||||
args::{Avx512Opcode, CmpOpcode, ExtMode, Imm8Reg, RegMem, ShiftKind, SseOpcode, CC},
|
||||
x64_map_regs, RegMapper,
|
||||
},
|
||||
machinst::{get_output_reg, InsnInput, InsnOutput, LowerCtx},
|
||||
};
|
||||
use smallvec::SmallVec;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
type Unit = ();
|
||||
type ValueSlice<'a> = &'a [Value];
|
||||
type ValueArray2 = [Value; 2];
|
||||
type ValueArray3 = [Value; 3];
|
||||
type WritableReg = Writable<Reg>;
|
||||
type ValueRegs = crate::machinst::ValueRegs<Reg>;
|
||||
|
||||
pub struct SinkableLoad {
|
||||
inst: Inst,
|
||||
addr_input: InsnInput,
|
||||
offset: i32,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct RegRenamer {
|
||||
// Map of `(old, new)` register names. Use a `SmallVec` because we typically
|
||||
// only have one or two renamings.
|
||||
renames: SmallVec<[(Reg, Reg); 2]>,
|
||||
}
|
||||
|
||||
impl RegRenamer {
|
||||
fn add_rename(&mut self, old: Reg, new: Reg) {
|
||||
self.renames.push((old, new));
|
||||
}
|
||||
|
||||
fn get_rename(&self, reg: Reg) -> Option<Reg> {
|
||||
self.renames
|
||||
.iter()
|
||||
.find(|(old, _)| reg == *old)
|
||||
.map(|(_, new)| *new)
|
||||
}
|
||||
}
|
||||
|
||||
impl RegMapper for RegRenamer {
|
||||
fn get_use(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg)
|
||||
}
|
||||
|
||||
fn get_def(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg)
|
||||
}
|
||||
|
||||
fn get_mod(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg)
|
||||
}
|
||||
}
|
||||
|
||||
/// The main entry point for lowering with ISLE.
|
||||
pub(crate) fn lower<C>(
|
||||
lower_ctx: &mut C,
|
||||
isa_flags: &x64_settings::Flags,
|
||||
outputs: &[InsnOutput],
|
||||
inst: Inst,
|
||||
) -> Result<(), ()>
|
||||
where
|
||||
C: LowerCtx<I = MInst>,
|
||||
{
|
||||
// TODO: reuse the ISLE context across lowerings so we can reuse its
|
||||
// internal heap allocations.
|
||||
let mut isle_ctx = IsleContext::new(lower_ctx, isa_flags);
|
||||
|
||||
let temp_regs = generated_code::constructor_lower(&mut isle_ctx, inst).ok_or(())?;
|
||||
let mut temp_regs = temp_regs.regs().iter();
|
||||
|
||||
// The ISLE generated code emits its own registers to define the
|
||||
// instruction's lowered values in. We rename those registers to the
|
||||
// registers they were assigned when their value was used as an operand in
|
||||
// earlier lowerings.
|
||||
let mut renamer = RegRenamer::default();
|
||||
for output in outputs {
|
||||
let dsts = get_output_reg(isle_ctx.lower_ctx, *output);
|
||||
for (temp, dst) in temp_regs.by_ref().zip(dsts.regs()) {
|
||||
renamer.add_rename(*temp, dst.to_reg());
|
||||
}
|
||||
}
|
||||
|
||||
for mut inst in isle_ctx.into_emitted_insts() {
|
||||
x64_map_regs(&mut inst, &renamer);
|
||||
lower_ctx.emit(inst);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct IsleContext<'a, C> {
|
||||
lower_ctx: &'a mut C,
|
||||
isa_flags: &'a x64_settings::Flags,
|
||||
emitted_insts: SmallVec<[MInst; 6]>,
|
||||
}
|
||||
|
||||
impl<'a, C> IsleContext<'a, C> {
|
||||
pub fn new(lower_ctx: &'a mut C, isa_flags: &'a x64_settings::Flags) -> Self {
|
||||
IsleContext {
|
||||
lower_ctx,
|
||||
isa_flags,
|
||||
emitted_insts: SmallVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_emitted_insts(self) -> SmallVec<[MInst; 6]> {
|
||||
self.emitted_insts
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C> generated_code::Context for IsleContext<'a, C>
|
||||
where
|
||||
C: LowerCtx<I = MInst>,
|
||||
{
|
||||
#[inline]
|
||||
fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
|
||||
let [a, b] = *arr;
|
||||
(a, b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn pack_value_array_2(&mut self, a: Value, b: Value) -> ValueArray2 {
|
||||
[a, b]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unpack_value_array_3(&mut self, arr: &ValueArray3) -> (Value, Value, Value) {
|
||||
let [a, b, c] = *arr;
|
||||
(a, b, c)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn pack_value_array_3(&mut self, a: Value, b: Value, c: Value) -> ValueArray3 {
|
||||
[a, b, c]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_reg(&mut self, reg: Reg) -> ValueRegs {
|
||||
ValueRegs::one(reg)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_regs(&mut self, r1: Reg, r2: Reg) -> ValueRegs {
|
||||
ValueRegs::two(r1, r2)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn temp_writable_reg(&mut self, ty: Type) -> WritableReg {
|
||||
let value_regs = self.lower_ctx.alloc_tmp(ty);
|
||||
value_regs.only_reg().unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn invalid_reg(&mut self) -> Reg {
|
||||
Reg::invalid()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn put_in_reg(&mut self, val: Value) -> Reg {
|
||||
self.lower_ctx.put_value_in_regs(val).only_reg().unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn put_in_regs(&mut self, val: Value) -> ValueRegs {
|
||||
self.lower_ctx.put_value_in_regs(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_regs_get(&mut self, regs: ValueRegs, i: usize) -> Reg {
|
||||
regs.regs()[i]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_as_u64(&mut self, x: u8) -> u64 {
|
||||
x.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u16_as_u64(&mut self, x: u16) -> u64 {
|
||||
x.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_as_u64(&mut self, x: u32) -> u64 {
|
||||
x.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits(&mut self, ty: Type) -> u16 {
|
||||
ty.bits()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fits_in_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_list_slice(&mut self, list: ValueList) -> ValueSlice {
|
||||
list.as_slice(&self.lower_ctx.dfg().value_lists)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unwrap_head_value_list_1(&mut self, list: ValueList) -> (Value, ValueSlice) {
|
||||
match self.value_list_slice(list) {
|
||||
[head, tail @ ..] => (*head, tail),
|
||||
_ => out_of_line_panic("`unwrap_head_value_list_1` on empty `ValueList`"),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unwrap_head_value_list_2(&mut self, list: ValueList) -> (Value, Value, ValueSlice) {
|
||||
match self.value_list_slice(list) {
|
||||
[head1, head2, tail @ ..] => (*head1, *head2, tail),
|
||||
_ => out_of_line_panic(
|
||||
"`unwrap_head_value_list_2` on list without at least two elements",
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn writable_reg_to_reg(&mut self, r: WritableReg) -> Reg {
|
||||
r.to_reg()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
|
||||
imm.bits() as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn inst_results(&mut self, inst: Inst) -> ValueSlice {
|
||||
self.lower_ctx.dfg().inst_results(inst)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn first_result(&mut self, inst: Inst) -> Option<Value> {
|
||||
self.lower_ctx.dfg().inst_results(inst).first().copied()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn inst_data(&mut self, inst: Inst) -> InstructionData {
|
||||
self.lower_ctx.dfg()[inst].clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_type(&mut self, val: Value) -> Type {
|
||||
self.lower_ctx.dfg().value_type(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn multi_lane(&mut self, ty: Type) -> Option<(u8, u16)> {
|
||||
if ty.lane_count() > 1 {
|
||||
Some((ty.lane_bits(), ty.lane_count()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn def_inst(&mut self, val: Value) -> Option<Inst> {
|
||||
self.lower_ctx.dfg().value_def(val).inst()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn operand_size_of_type(&mut self, ty: Type) -> OperandSize {
|
||||
if ty.bits() == 64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
}
|
||||
}
|
||||
|
||||
fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
|
||||
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
|
||||
|
||||
if let Some(c) = inputs.constant {
|
||||
// Generate constants fresh at each use to minimize long-range
|
||||
// register pressure.
|
||||
let ty = self.value_type(val);
|
||||
return RegMem::reg(generated_code::constructor_imm(self, ty, c).unwrap());
|
||||
}
|
||||
|
||||
if let Some((src_insn, 0)) = inputs.inst {
|
||||
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, src_insn) {
|
||||
self.lower_ctx.sink_inst(src_insn);
|
||||
let amode = lower_to_amode(self.lower_ctx, addr_input, offset);
|
||||
return RegMem::mem(amode);
|
||||
}
|
||||
}
|
||||
|
||||
RegMem::reg(self.put_in_reg(val))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512vl_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512vl_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512dq_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512dq_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
|
||||
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
|
||||
let constant = self.lower_ctx.get_constant(inst)?;
|
||||
let imm = u8::try_from(constant).ok()?;
|
||||
Some(Imm8Reg::Imm8 { imm })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simm32_from_value(&mut self, val: Value) -> Option<RegMemImm> {
|
||||
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
|
||||
let constant: u64 = self.lower_ctx.get_constant(inst)?;
|
||||
let constant = constant as i64;
|
||||
to_simm32(constant)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simm32_from_imm64(&mut self, imm: Imm64) -> Option<RegMemImm> {
|
||||
to_simm32(imm.bits())
|
||||
}
|
||||
|
||||
fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
|
||||
let input = self.lower_ctx.get_value_as_source_or_const(val);
|
||||
if let Some((inst, 0)) = input.inst {
|
||||
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, inst) {
|
||||
return Some(SinkableLoad {
|
||||
inst,
|
||||
addr_input,
|
||||
offset,
|
||||
});
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn sink_load(&mut self, load: &SinkableLoad) -> RegMemImm {
|
||||
self.lower_ctx.sink_inst(load.inst);
|
||||
let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
|
||||
RegMemImm::Mem {
|
||||
addr: SyntheticAmode::Real(addr),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
|
||||
ExtMode::new(from_bits, to_bits).unwrap()
|
||||
}
|
||||
|
||||
fn emit(&mut self, inst: &MInst) -> Unit {
|
||||
for inst in inst.clone().mov_mitosis() {
|
||||
self.emitted_insts.push(inst);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nonzero_u64_fits_in_u32(&mut self, x: u64) -> Option<u64> {
|
||||
if x != 0 && x < u64::from(u32::MAX) {
|
||||
Some(x)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_simm32(constant: i64) -> Option<RegMemImm> {
|
||||
if constant == ((constant << 32) >> 32) {
|
||||
Some(RegMemImm::Imm {
|
||||
simm32: constant as u32,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
#[track_caller]
|
||||
fn out_of_line_panic(msg: &str) -> ! {
|
||||
panic!("{}", msg);
|
||||
}
|
||||
3639
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
Normal file
3639
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,13 +11,14 @@ use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::ir::{
|
||||
ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, Inst,
|
||||
InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef,
|
||||
ValueLabelAssignments, ValueLabelStart,
|
||||
ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, ExternalName, Function,
|
||||
GlobalValueData, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value,
|
||||
ValueDef, ValueLabelAssignments, ValueLabelStart,
|
||||
};
|
||||
use crate::machinst::{
|
||||
writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode,
|
||||
VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst, ValueRegs,
|
||||
non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder,
|
||||
LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants,
|
||||
VCodeInst, ValueRegs,
|
||||
};
|
||||
use crate::CodegenResult;
|
||||
use alloc::boxed::Box;
|
||||
@@ -61,6 +62,8 @@ pub trait LowerCtx {
|
||||
/// The instruction type for which this lowering framework is instantiated.
|
||||
type I: VCodeInst;
|
||||
|
||||
fn dfg(&self) -> &DataFlowGraph;
|
||||
|
||||
// Function-level queries:
|
||||
|
||||
/// Get the `ABICallee`.
|
||||
@@ -124,8 +127,12 @@ pub trait LowerCtx {
|
||||
/// instruction's result(s) must have *no* uses remaining, because it will
|
||||
/// not be codegen'd (it has been integrated into the current instruction).
|
||||
fn get_input_as_source_or_const(&self, ir_inst: Inst, idx: usize) -> NonRegInput;
|
||||
/// Like `get_input_as_source_or_const` but with a `Value`.
|
||||
fn get_value_as_source_or_const(&self, value: Value) -> NonRegInput;
|
||||
/// Put the `idx`th input into register(s) and return the assigned register.
|
||||
fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg>;
|
||||
/// Put the given value into register(s) and return the assigned register.
|
||||
fn put_value_in_regs(&mut self, value: Value) -> ValueRegs<Reg>;
|
||||
/// Get the `idx`th output register(s) of the given IR instruction. When
|
||||
/// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that
|
||||
/// the backend will write results to these output register(s). This
|
||||
@@ -1002,101 +1009,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
|
||||
Ok((vcode, stack_map_info))
|
||||
}
|
||||
|
||||
fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {
|
||||
log::trace!("put_value_in_reg: val {}", val);
|
||||
let mut regs = self.value_regs[val];
|
||||
log::trace!(" -> regs {:?}", regs);
|
||||
assert!(regs.is_valid());
|
||||
|
||||
self.value_lowered_uses[val] += 1;
|
||||
|
||||
// Pinned-reg hack: if backend specifies a fixed pinned register, use it
|
||||
// directly when we encounter a GetPinnedReg op, rather than lowering
|
||||
// the actual op, and do not return the source inst to the caller; the
|
||||
// value comes "out of the ether" and we will not force generation of
|
||||
// the superfluous move.
|
||||
if let ValueDef::Result(i, 0) = self.f.dfg.value_def(val) {
|
||||
if self.f.dfg[i].opcode() == Opcode::GetPinnedReg {
|
||||
if let Some(pr) = self.pinned_reg {
|
||||
regs = ValueRegs::one(pr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
|
||||
/// Get the actual inputs for a value. This is the implementation for
|
||||
/// `get_input()` but starting from the SSA value, which is not exposed to
|
||||
/// the backend.
|
||||
fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {
|
||||
log::trace!(
|
||||
"get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",
|
||||
val,
|
||||
self.cur_inst,
|
||||
self.cur_scan_entry_color,
|
||||
);
|
||||
let inst = match self.f.dfg.value_def(val) {
|
||||
// OK to merge source instruction if (i) we have a source
|
||||
// instruction, and:
|
||||
// - It has no side-effects, OR
|
||||
// - It has a side-effect, has one output value, that one output has
|
||||
// only one use (this one), and the instruction's color is *one less
|
||||
// than* the current scan color.
|
||||
//
|
||||
// This latter set of conditions is testing whether a
|
||||
// side-effecting instruction can sink to the current scan
|
||||
// location; this is possible if the in-color of this inst is
|
||||
// equal to the out-color of the producing inst, so no other
|
||||
// side-effecting ops occur between them (which will only be true
|
||||
// if they are in the same BB, because color increments at each BB
|
||||
// start).
|
||||
//
|
||||
// If it is actually sunk, then in `merge_inst()`, we update the
|
||||
// scan color so that as we scan over the range past which the
|
||||
// instruction was sunk, we allow other instructions (that came
|
||||
// prior to the sunk instruction) to sink.
|
||||
ValueDef::Result(src_inst, result_idx) => {
|
||||
let src_side_effect = has_lowering_side_effect(self.f, src_inst);
|
||||
log::trace!(" -> src inst {}", src_inst);
|
||||
log::trace!(" -> has lowering side effect: {}", src_side_effect);
|
||||
if !src_side_effect {
|
||||
// Pure instruction: always possible to sink.
|
||||
Some((src_inst, result_idx))
|
||||
} else {
|
||||
// Side-effect: test whether this is the only use of the
|
||||
// only result of the instruction, and whether colors allow
|
||||
// the code-motion.
|
||||
if self.cur_scan_entry_color.is_some()
|
||||
&& self.value_uses[val] == 1
|
||||
&& self.value_lowered_uses[val] == 0
|
||||
&& self.num_outputs(src_inst) == 1
|
||||
&& self
|
||||
.side_effect_inst_entry_colors
|
||||
.get(&src_inst)
|
||||
.unwrap()
|
||||
.get()
|
||||
+ 1
|
||||
== self.cur_scan_entry_color.unwrap().get()
|
||||
{
|
||||
Some((src_inst, 0))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
let constant = inst.and_then(|(inst, _)| self.get_constant(inst));
|
||||
|
||||
NonRegInput { inst, constant }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
type I = I;
|
||||
|
||||
fn dfg(&self) -> &DataFlowGraph {
|
||||
&self.f.dfg
|
||||
}
|
||||
|
||||
fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
|
||||
self.vcode.abi()
|
||||
}
|
||||
@@ -1207,12 +1128,124 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
self.get_value_as_source_or_const(val)
|
||||
}
|
||||
|
||||
fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {
|
||||
log::trace!(
|
||||
"get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",
|
||||
val,
|
||||
self.cur_inst,
|
||||
self.cur_scan_entry_color,
|
||||
);
|
||||
let inst = match self.f.dfg.value_def(val) {
|
||||
// OK to merge source instruction if (i) we have a source
|
||||
// instruction, and:
|
||||
// - It has no side-effects, OR
|
||||
// - It has a side-effect, has one output value, that one output has
|
||||
// only one use (this one), and the instruction's color is *one less
|
||||
// than* the current scan color.
|
||||
//
|
||||
// This latter set of conditions is testing whether a
|
||||
// side-effecting instruction can sink to the current scan
|
||||
// location; this is possible if the in-color of this inst is
|
||||
// equal to the out-color of the producing inst, so no other
|
||||
// side-effecting ops occur between them (which will only be true
|
||||
// if they are in the same BB, because color increments at each BB
|
||||
// start).
|
||||
//
|
||||
// If it is actually sunk, then in `merge_inst()`, we update the
|
||||
// scan color so that as we scan over the range past which the
|
||||
// instruction was sunk, we allow other instructions (that came
|
||||
// prior to the sunk instruction) to sink.
|
||||
ValueDef::Result(src_inst, result_idx) => {
|
||||
let src_side_effect = has_lowering_side_effect(self.f, src_inst);
|
||||
log::trace!(" -> src inst {}", src_inst);
|
||||
log::trace!(" -> has lowering side effect: {}", src_side_effect);
|
||||
if !src_side_effect {
|
||||
// Pure instruction: always possible to sink.
|
||||
Some((src_inst, result_idx))
|
||||
} else {
|
||||
// Side-effect: test whether this is the only use of the
|
||||
// only result of the instruction, and whether colors allow
|
||||
// the code-motion.
|
||||
if self.cur_scan_entry_color.is_some()
|
||||
&& self.value_uses[val] == 1
|
||||
&& self.value_lowered_uses[val] == 0
|
||||
&& self.num_outputs(src_inst) == 1
|
||||
&& self
|
||||
.side_effect_inst_entry_colors
|
||||
.get(&src_inst)
|
||||
.unwrap()
|
||||
.get()
|
||||
+ 1
|
||||
== self.cur_scan_entry_color.unwrap().get()
|
||||
{
|
||||
Some((src_inst, 0))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
let constant = inst.and_then(|(inst, _)| self.get_constant(inst));
|
||||
|
||||
NonRegInput { inst, constant }
|
||||
}
|
||||
|
||||
fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg> {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.put_value_in_regs(val)
|
||||
}
|
||||
|
||||
fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
log::trace!("put_value_in_regs: val {}", val);
|
||||
|
||||
// If the value is a constant, then (re)materialize it at each use. This
|
||||
// lowers register pressure.
|
||||
if let Some(c) = self
|
||||
.f
|
||||
.dfg
|
||||
.value_def(val)
|
||||
.inst()
|
||||
.and_then(|inst| self.get_constant(inst))
|
||||
{
|
||||
let ty = self.f.dfg.value_type(val);
|
||||
|
||||
let regs = self.alloc_tmp(ty);
|
||||
log::trace!(" -> regs {:?}", regs);
|
||||
assert!(regs.is_valid());
|
||||
|
||||
let insts = I::gen_constant(regs, c.into(), ty, |ty| {
|
||||
self.alloc_tmp(ty).only_reg().unwrap()
|
||||
});
|
||||
for inst in insts {
|
||||
self.emit(inst);
|
||||
}
|
||||
return non_writable_value_regs(regs);
|
||||
}
|
||||
|
||||
let mut regs = self.value_regs[val];
|
||||
log::trace!(" -> regs {:?}", regs);
|
||||
assert!(regs.is_valid());
|
||||
|
||||
self.value_lowered_uses[val] += 1;
|
||||
|
||||
// Pinned-reg hack: if backend specifies a fixed pinned register, use it
|
||||
// directly when we encounter a GetPinnedReg op, rather than lowering
|
||||
// the actual op, and do not return the source inst to the caller; the
|
||||
// value comes "out of the ether" and we will not force generation of
|
||||
// the superfluous move.
|
||||
if let ValueDef::Result(i, 0) = self.f.dfg.value_def(val) {
|
||||
if self.f.dfg[i].opcode() == Opcode::GetPinnedReg {
|
||||
if let Some(pr) = self.pinned_reg {
|
||||
regs = ValueRegs::one(pr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
|
||||
fn get_output(&self, ir_inst: Inst, idx: usize) -> ValueRegs<Writable<Reg>> {
|
||||
let val = self.f.dfg.inst_results(ir_inst)[idx];
|
||||
writable_value_regs(self.value_regs[val])
|
||||
|
||||
202
cranelift/codegen/src/prelude.isle
Normal file
202
cranelift/codegen/src/prelude.isle
Normal file
@@ -0,0 +1,202 @@
|
||||
;; This is a prelude of standard definitions for ISLE, the instruction-selector
|
||||
;; DSL, as we use it bound to our interfaces.
|
||||
|
||||
;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `()`
|
||||
(type Unit (primitive Unit))
|
||||
|
||||
;; `bool` is declared in `clif.isle`.
|
||||
(extern const $true bool)
|
||||
(extern const $false bool)
|
||||
|
||||
(type u8 (primitive u8))
|
||||
(type u16 (primitive u16))
|
||||
(type u32 (primitive u32))
|
||||
(type u64 (primitive u64))
|
||||
(type u128 (primitive u128))
|
||||
(type usize (primitive usize))
|
||||
|
||||
(type i8 (primitive i8))
|
||||
(type i16 (primitive i16))
|
||||
(type i32 (primitive i32))
|
||||
(type i64 (primitive i64))
|
||||
(type i128 (primitive i128))
|
||||
(type isize (primitive isize))
|
||||
|
||||
;; `cranelift-entity`-based identifiers.
|
||||
(type Inst (primitive Inst))
|
||||
(type Type (primitive Type))
|
||||
(type Value (primitive Value))
|
||||
|
||||
;; ISLE representation of `&[Value]`.
|
||||
(type ValueSlice (primitive ValueSlice))
|
||||
|
||||
(type ValueList (primitive ValueList))
|
||||
(type ValueRegs (primitive ValueRegs))
|
||||
|
||||
;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type Reg (primitive Reg))
|
||||
(type WritableReg (primitive WritableReg))
|
||||
|
||||
;; Construct a `ValueRegs` of one register.
|
||||
(decl value_reg (Reg) ValueRegs)
|
||||
(extern constructor value_reg value_reg)
|
||||
|
||||
;; Construct a `ValueRegs` of two registers.
|
||||
(decl value_regs (Reg Reg) ValueRegs)
|
||||
(extern constructor value_regs value_regs)
|
||||
|
||||
;; Get a temporary register for writing.
|
||||
(decl temp_writable_reg (Type) WritableReg)
|
||||
(extern constructor temp_writable_reg temp_writable_reg)
|
||||
|
||||
;; Get a temporary register for reading.
|
||||
(decl temp_reg (Type) Reg)
|
||||
(rule (temp_reg ty)
|
||||
(writable_reg_to_reg (temp_writable_reg ty)))
|
||||
|
||||
;; Get the invalid register.
|
||||
(decl invalid_reg () Reg)
|
||||
(extern constructor invalid_reg invalid_reg)
|
||||
|
||||
;; Put the given value into a register.
|
||||
;;
|
||||
;; Asserts that the value fits into a single register, and doesn't require
|
||||
;; multiple registers for its representation (like `i128` on x64 for example).
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_reg (Value) Reg)
|
||||
(extern constructor put_in_reg put_in_reg)
|
||||
|
||||
;; Put the given value into one or more registers.
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_regs (Value) ValueRegs)
|
||||
(extern constructor put_in_regs put_in_regs)
|
||||
|
||||
;; Get the `n`th register inside a `ValueRegs`.
|
||||
(decl value_regs_get (ValueRegs usize) Reg)
|
||||
(extern constructor value_regs_get value_regs_get)
|
||||
|
||||
;; Put the value into one or more registers and return the first register.
|
||||
;;
|
||||
;; Unlike `put_in_reg`, this does not assert that the value fits in a single
|
||||
;; register. This is useful for things like a `i128` shift amount, where we mask
|
||||
;; the shift amount to the bit width of the value being shifted, and so the high
|
||||
;; half of the `i128` won't ever be used.
|
||||
;;
|
||||
;; As a side efect, this marks that value as used.
|
||||
(decl lo_reg (Value) Reg)
|
||||
(rule (lo_reg val)
|
||||
(let ((regs ValueRegs (put_in_regs val)))
|
||||
(value_regs_get regs 0)))
|
||||
|
||||
;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl u8_as_u64 (u8) u64)
|
||||
(extern constructor u8_as_u64 u8_as_u64)
|
||||
|
||||
(decl u16_as_u64 (u16) u64)
|
||||
(extern constructor u16_as_u64 u16_as_u64)
|
||||
|
||||
(decl u32_as_u64 (u32) u64)
|
||||
(extern constructor u32_as_u64 u32_as_u64)
|
||||
|
||||
;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(extern const $B1 Type)
|
||||
(extern const $B8 Type)
|
||||
(extern const $B16 Type)
|
||||
(extern const $B32 Type)
|
||||
(extern const $B64 Type)
|
||||
(extern const $B128 Type)
|
||||
|
||||
(extern const $I8 Type)
|
||||
(extern const $I16 Type)
|
||||
(extern const $I32 Type)
|
||||
(extern const $I64 Type)
|
||||
(extern const $I128 Type)
|
||||
|
||||
(extern const $B8X16 Type)
|
||||
(extern const $B16X8 Type)
|
||||
(extern const $B32X4 Type)
|
||||
(extern const $B64X2 Type)
|
||||
|
||||
(extern const $I8X16 Type)
|
||||
(extern const $I16X8 Type)
|
||||
(extern const $I32X4 Type)
|
||||
(extern const $I64X2 Type)
|
||||
|
||||
(extern const $F32X4 Type)
|
||||
(extern const $F64X2 Type)
|
||||
|
||||
;; Get the bit width of a given type.
|
||||
(decl ty_bits (Type) u16)
|
||||
(extern constructor ty_bits ty_bits)
|
||||
|
||||
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; An extractor that only matches types that can fit in 64 bits.
|
||||
(decl fits_in_64 (Type) Type)
|
||||
(extern extractor fits_in_64 fits_in_64)
|
||||
|
||||
;; Extractor to get a `ValueSlice` out of a `ValueList`.
|
||||
(decl value_list_slice (ValueSlice) ValueList)
|
||||
(extern extractor infallible value_list_slice value_list_slice)
|
||||
|
||||
;; Extractor to get the first element from a value list, along with its tail as
|
||||
;; a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
|
||||
(extern extractor infallible unwrap_head_value_list_1 unwrap_head_value_list_1)
|
||||
|
||||
;; Extractor to get the first two elements from a value list, along with its
|
||||
;; tail as a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
|
||||
(extern extractor infallible unwrap_head_value_list_2 unwrap_head_value_list_2)
|
||||
|
||||
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
|
||||
(decl writable_reg_to_reg (WritableReg) Reg)
|
||||
(extern constructor writable_reg_to_reg writable_reg_to_reg)
|
||||
|
||||
;; Extract a `u64` from an `Imm64`.
|
||||
(decl u64_from_imm64 (u64) Imm64)
|
||||
(extern extractor infallible u64_from_imm64 u64_from_imm64)
|
||||
|
||||
;; Extract the result values for the given instruction.
|
||||
(decl inst_results (ValueSlice) Inst)
|
||||
(extern extractor infallible inst_results inst_results)
|
||||
|
||||
;; Extract the first result value of the given instruction.
|
||||
(decl first_result (Value) Inst)
|
||||
(extern extractor first_result first_result)
|
||||
|
||||
;; Extract the `InstructionData` for an `Inst`.
|
||||
(decl inst_data (InstructionData) Inst)
|
||||
(extern extractor infallible inst_data inst_data)
|
||||
|
||||
;; Extract the type of a `Value`.
|
||||
(decl value_type (Type) Value)
|
||||
(extern extractor infallible value_type value_type)
|
||||
|
||||
;; Extract the type of the instruction's first result.
|
||||
(decl result_type (Type) Inst)
|
||||
(extractor (result_type ty)
|
||||
(first_result (value_type ty)))
|
||||
|
||||
;; Extract the type of the instruction's first result and pass along the
|
||||
;; instruction as well.
|
||||
(decl has_type (Type Inst) Inst)
|
||||
(extractor (has_type ty inst)
|
||||
(and (result_type ty)
|
||||
inst))
|
||||
|
||||
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
|
||||
;; type. Will only match when there is more than one lane.
|
||||
(decl multi_lane (u8 u16) Type)
|
||||
(extern extractor multi_lane multi_lane)
|
||||
|
||||
;; Match the instruction that defines the given value, if any.
|
||||
(decl def_inst (Inst) Value)
|
||||
(extern extractor def_inst def_inst)
|
||||
Reference in New Issue
Block a user