wasmtime/cranelift/codegen/src/isa/riscv64/inst.isle

;; Instruction formats.
(type MInst
  (enum
    ;; A no-op of zero size.
    (Nop0)
    (Nop4)

    ;; load immediate
    (Lui
      (rd WritableReg)
      (imm Imm20))

    (LoadConst32
      (rd WritableReg)
      (imm u32))

    (LoadConst64
      (rd WritableReg)
      (imm u64))

     (Auipc
      (rd WritableReg)
      (imm Imm20))

    ;; An ALU operation with one register sources and a register destination.
    (FpuRR
      (alu_op FpuOPRR)
      (frm OptionFloatRoundingMode)
      (rd WritableReg)
      (rs Reg))


    ;; An ALU operation with two register sources and a register destination.
    (AluRRR
      (alu_op AluOPRRR)
      (rd WritableReg)
      (rs1 Reg)
      (rs2 Reg))

    ;; An ALU operation with two register sources and a register destination.
    (FpuRRR
      (alu_op FpuOPRRR)
      (frm OptionFloatRoundingMode)
      (rd WritableReg)
      (rs1 Reg)
      (rs2 Reg))

    ;; An ALU operation with three register sources and a register destination.
    (FpuRRRR
      (alu_op FpuOPRRRR)
      (frm OptionFloatRoundingMode)
      (rd WritableReg)
      (rs1 Reg)
      (rs2 Reg)
      (rs3 Reg))

    ;; An ALU operation with a register source and an immediate-12 source, and a register
    ;; destination.
    (AluRRImm12
      (alu_op AluOPRRI)
      (rd WritableReg)
      (rs Reg)
      (imm12 Imm12))

    ;; An load
    (Load
      (rd WritableReg)
      (op LoadOP)
      (flags MemFlags)
      (from AMode))
    ;; An Store
    (Store
      (to AMode)
      (op StoreOP)
      (flags MemFlags)
      (src Reg))

    ;; A pseudo-instruction that captures register arguments in vregs.
    (Args
      (args VecArgPair))

    (Ret (rets VecRetPair))

     (Extend
      (rd WritableReg)
      (rn Reg)
      (signed bool)
      (from_bits u8)
      (to_bits u8))

    (AjustSp
      (amount i64))
    (Call
      (info BoxCallInfo))

      ;; A machine indirect-call instruction.
    (CallInd
      (info BoxCallIndInfo))

    (TrapIf
      (test Reg)
      (trap_code TrapCode))

    ;; use a simple compare to decide to cause trap or not.
    (TrapIfC
      (rs1 Reg)
      (rs2 Reg)
      (cc IntCC)
      (trap_code TrapCode))

    (Jal
      ;; (rd WritableReg) don't use
      (dest BranchTarget))

    (CondBr
      (taken BranchTarget)
      (not_taken BranchTarget)
      (kind IntegerCompare))

    ;; Load an inline symbol reference.
    (LoadExtName
      (rd WritableReg)
      (name BoxExternalName)
      (offset i64))

    ;; Load address referenced by `mem` into `rd`.
    (LoadAddr
      (rd WritableReg)
      (mem AMode))

    ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This
    ;; controls how AMode::NominalSPOffset args are lowered.
    (VirtualSPOffsetAdj
      (amount i64))

    ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we
    ;; keep them separate at the `Inst` level for better pretty-printing
    ;; and faster `is_move()` logic.
    (Mov
      (rd WritableReg)
      (rm Reg)
      (ty Type))

    ;; A MOV instruction, but where the source register is a non-allocatable
    ;; PReg. It's important that the register be non-allocatable, as regalloc2
    ;; will not see it as used.
    (MovFromPReg
      (rd WritableReg)
      (rm PReg))

    (Fence
      (pred FenceReq)
      (succ FenceReq))

    (FenceI)

    (ECall)

    (EBreak)

    ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at
    ;; runtime.
    (Udf
      (trap_code TrapCode))
    ;; a jump and link register operation
    (Jalr
      ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0.
      (rd WritableReg)
      (base Reg)
      (offset Imm12))

    ;; atomic operations.
    (Atomic
      (op AtomicOP)
      (rd WritableReg)
      (addr Reg)
      (src Reg)
      (amo AMO))
    ;; an atomic store
    (AtomicStore
      (src Reg)
      (ty Type)
      (p Reg))
    ;; an atomic load.
    (AtomicLoad
      (rd WritableReg)
      (ty Type)
      (p Reg))

    ;; an atomic nand need using loop to implement.
    (AtomicRmwLoop
      (offset Reg)
      (op AtomicRmwOp)
      (dst WritableReg)
      (ty Type)
      (p Reg)
      (x Reg)
      (t0 WritableReg))

    ;; select x or y base on condition
    (Select
      (dst VecWritableReg)
      (ty Type)
      (condition Reg)
      (x ValueRegs)
      (y ValueRegs))

    (BrTable
      (index Reg)
      (tmp1 WritableReg)
      (tmp2 WritableReg)
      (targets VecBranchTarget))

    ;; atomic compare and set operation
    (AtomicCas
      (offset Reg)
      (t0 WritableReg)
      (dst WritableReg)
      (e Reg)
      (addr Reg)
      (v Reg)
      (ty Type))
    ;; select x or y base on op_code
    (IntSelect
      (op IntSelectOP)
      (dst VecWritableReg)
      (x ValueRegs)
      (y ValueRegs)
      (ty Type))
    ;; risc-v csr operations.
    (Csr
      (csr_op CsrOP)
      (rd WritableReg)
      (rs OptionReg)
      (imm OptionUimm5)
      (csr CsrAddress))
    ;; an integer compare.
    (Icmp
      (cc IntCC)
      (rd WritableReg)
      (a ValueRegs)
      (b ValueRegs)
      (ty Type))
    ;; select a reg base on condition.
    ;; very useful because in lowering stage we can not have condition branch.
    (SelectReg
      (rd WritableReg)
      (rs1 Reg)
      (rs2 Reg)
      (condition IntegerCompare))
    ;;
    (FcvtToInt
      (is_sat bool)
      (rd WritableReg)
      (tmp WritableReg) ;; a float register to load bounds.
      (rs Reg)
      (is_signed bool)
      (in_type Type)
      (out_type Type))

    (RawData (data VecU8))

    ;; An unwind pseudo-instruction.
       (Unwind
        (inst UnwindInst))

    ;; A dummy use, useful to keep a value alive.
       (DummyUse
        (reg Reg))
    ;;;
    (FloatRound
      (op FloatRoundOP)
      (rd WritableReg)
      (int_tmp WritableReg)
      (f_tmp WritableReg)
      (rs Reg)
      (ty Type))
    ;;;; FMax
    (FloatSelect
      (op FloatSelectOP)
      (rd WritableReg)
      ;; a integer register
      (tmp WritableReg)
      (rs1 Reg)
      (rs2 Reg)
      (ty Type))
    (FloatSelectPseudo
      (op FloatSelectOP)
      (rd WritableReg)
      ;; a integer register
      (tmp WritableReg)
      (rs1 Reg)
      (rs2 Reg)
      (ty Type))

    ;; popcnt  if target doesn't support extension B
    ;; use iteration to implement.
    (Popcnt
      (sum WritableReg)
      (step WritableReg)
      (tmp WritableReg)
      (rs Reg)
      (ty Type))

    ;;; counting leading or trailing zeros.
    (Cltz
      ;; leading or trailing.
      (leading bool)
      (sum WritableReg)
      (step WritableReg)
      (tmp WritableReg)
      (rs Reg)
      (ty Type))
    ;; Byte-reverse register
    (Rev8
      (rs Reg)
      (step WritableReg)
      (tmp WritableReg)
      (rd WritableReg))
    ;;
    (Brev8
      (rs Reg)
      (ty Type)
      (step WritableReg)
      (tmp WritableReg)
      (tmp2 WritableReg)
      (rd WritableReg))
    (StackProbeLoop
      (guard_size u32)
      (probe_count u32)
      (tmp WritableReg))

    (VecAluRRR
      (op VecAluOpRRR)
      (vd WritableReg)
      (vs1 Reg)
      (vs2 Reg)
      (vstate VState))

    (VecSetState
      (rd WritableReg)
      (vstate VState))

    (VecLoad
      (eew VecElementWidth)
      (to WritableReg)
      (from VecAMode)
      (flags MemFlags)
      (vstate VState))

    (VecStore
      (eew VecElementWidth)
      (to VecAMode)
      (from Reg)
      (flags MemFlags)
      (vstate VState))
))


(type FloatSelectOP (enum
  (Max)
  (Min)
))

(type FloatRoundOP (enum
  (Nearest)
  (Ceil)
  (Floor)
  (Trunc)
))

(type CsrOP (enum
  (Csrrw)
  (Csrrs)
  (Csrrc)
  (Csrrwi)
  (Csrrsi)
  (Csrrci)
))

(type IntSelectOP (enum
  (Smax)
  (Umax)
  (Smin)
  (Umin)
))

(type AtomicOP (enum
  (LrW)
  (ScW)
  (AmoswapW)
  (AmoaddW)
  (AmoxorW)
  (AmoandW)
  (AmoorW)
  (AmominW)
  (AmomaxW)
  (AmominuW)
  (AmomaxuW)
  (LrD)
  (ScD)
  (AmoswapD)
  (AmoaddD)
  (AmoxorD)
  (AmoandD)
  (AmoorD)
  (AmominD)
  (AmomaxD)
  (AmominuD)
  (AmomaxuD)
))

(type FpuOPRRRR (enum
  ;; float32
  (FmaddS)
  (FmsubS)
  (FnmsubS)
  (FnmaddS)
  ;; float64
  (FmaddD)
  (FmsubD)
  (FnmsubD)
  (FnmaddD)
))

(type FClassResult (enum
  ;;0 rs1 is −∞.
  (NegInfinite)
  ;; 1 rs1 is a negative normal number.
  (NegNormal)
  ;; 2 rs1 is a negative subnormal number.
  (NegSubNormal)
  ;; 3 rs1 is −0.
  (NegZero)
  ;; 4 rs1 is +0.
  (PosZero)
  ;; 5 rs1 is a positive subnormal number.
  (PosSubNormal)
  ;; 6 rs1 is a positive normal number.
  (PosNormal)
  ;; 7 rs1 is +∞.
  (PosInfinite)
  ;; 8 rs1 is a signaling NaN.
  (SNaN)
  ;; 9 rs1 is a quiet NaN.
  (QNaN)
))

(type FpuOPRR (enum
  ;; RV32F Standard Extension
  (FsqrtS)
  (FcvtWS)
  (FcvtWuS)
  (FmvXW)
  (FclassS)
  (FcvtSw)
  (FcvtSwU)
  (FmvWX)


  ;; RV64F Standard Extension (in addition to RV32F)
  (FcvtLS)
  (FcvtLuS)
  (FcvtSL)
  (FcvtSLU)


  ;; RV64D Standard Extension (in addition to RV32D)
  (FcvtLD)
  (FcvtLuD)
  (FmvXD)
  (FcvtDL)
  (FcvtDLu)
  (FmvDX)

  ;; RV32D Standard Extension
  (FsqrtD)
  (FcvtSD)
  (FcvtDS)
  (FclassD)
  (FcvtWD)
  (FcvtWuD)
  (FcvtDW)
  (FcvtDWU)
  ;; bitmapip

))

(type LoadOP (enum
  (Lb)
  (Lh)
  (Lw)
  (Lbu)
  (Lhu)
  (Lwu)
  (Ld)
  (Flw)
  (Fld)
))

(type StoreOP (enum
  (Sb)
  (Sh)
  (Sw)
  (Sd)
  (Fsw)
  (Fsd)
))

(type AluOPRRR (enum
  ;; base set
  (Add)
  (Sub)
  (Sll)
  (Slt)
  (SltU)
  (Sgt)
  (Sgtu)
  (Xor)
  (Srl)
  (Sra)
  (Or)
  (And)

  ;; RV64I Base Instruction Set (in addition to RV32I)
  (Addw)
  (Subw)
  (Sllw)
  (Srlw)
  (Sraw)


  ;;RV32M Standard Extension
  (Mul)
  (Mulh)
  (Mulhsu)
  (Mulhu)
  (Div)
  (DivU)
  (Rem)
  (RemU)

  ;; RV64M Standard Extension (in addition to RV32M)
  (Mulw)
  (Divw)
  (Divuw)
  (Remw)
  (Remuw)

  ;; Zba: Address Generation Instructions
  (Adduw)
  (Sh1add)
  (Sh1adduw)
  (Sh2add)
  (Sh2adduw)
  (Sh3add)
  (Sh3adduw)

  ;; Zbb: Bit Manipulation Instructions
  (Andn)
  (Orn)
  (Xnor)
  (Max)
  (Maxu)
  (Min)
  (Minu)
  (Rol)
  (Rolw)
  (Ror)
  (Rorw)

  ;; Zbs: Single-bit instructions
  (Bclr)
  (Bext)
  (Binv)
  (Bset)

  ;; Zbc: Carry-less multiplication
  (Clmul)
  (Clmulh)
  (Clmulr)

  ;; Zbkb: Bit-manipulation for Cryptography
  (Pack)
  (Packw)
  (Packh)
))


(type FpuOPRRR (enum
  ;; RV32F Standard Extension
  (FaddS)
  (FsubS)
  (FmulS)
  (FdivS)

  (FsgnjS)
  (FsgnjnS)
  (FsgnjxS)
  (FminS)
  (FmaxS)
  (FeqS)
  (FltS)
  (FleS)

  ;; RV32D Standard Extension
  (FaddD)
  (FsubD)
  (FmulD)
  (FdivD)
  (FsgnjD)
  (FsgnjnD)
  (FsgnjxD)
  (FminD)
  (FmaxD)
  (FeqD)
  (FltD)
  (FleD)
))


(type AluOPRRI (enum
  ;; Base ISA
  (Addi)
  (Slti)
  (SltiU)
  (Xori)
  (Ori)
  (Andi)
  (Slli)
  (Srli)
  (Srai)
  (Addiw)
  (Slliw)
  (SrliW)
  (Sraiw)

  ;; Zba: Address Generation Instructions
  (SlliUw)

  ;; Zbb: Bit Manipulation Instructions
  (Clz)
  (Clzw)
  (Ctz)
  (Ctzw)
  (Cpop)
  (Cpopw)
  (Sextb)
  (Sexth)
  (Zexth)
  (Rori)
  (Roriw)
  (Rev8)
  (Brev8)
  (Orcb)

  ;; Zbs: Single-bit instructions
  (Bclri)
  (Bexti)
  (Binvi)
  (Bseti)
))


(type FRM (enum
  ;; Round to Nearest, ties to Even
  (RNE)
  ;; Round towards Zero
  (RTZ)
  ;;  Round Down (towards −∞)
  (RDN)
  ;; Round Up (towards +∞)
  (RUP)
  ;; Round to Nearest, ties to Max Magnitude
  (RMM)
  ;; In instruction’s rm field, selects dynamic rounding mode;
  ;;In Rounding Mode register, Invalid.
  (Fcsr)
))

(type FFlagsException (enum
  ;; Invalid Operation
  (NV)
  ;; Divide by Zero
  (DZ)
  ;; Overflow
  (OF)
  ;; Underflow
  (UF)
  ;; Inexact
  (NX)
))

;;;; input output read write
;;;; SI SO SR SW
;;;; PI PO PR PW
;;;; lowest four bit are used.
(type FenceReq (primitive u8))

(type FenceFm (enum
    (None)
    (Tso)
))

(type VecBranchTarget (primitive VecBranchTarget))
(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
(type IntegerCompare (primitive IntegerCompare))
(type AMode (primitive AMode))
(type OptionReg (primitive OptionReg))
(type OptionImm12 (primitive OptionImm12))
(type OptionUimm5 (primitive OptionUimm5))
(type Imm12 (primitive Imm12))
(type UImm5 (primitive UImm5))
(type Imm20 (primitive Imm20))
(type Imm3 (primitive Imm3))
(type BranchTarget (primitive BranchTarget))
(type CsrAddress (primitive CsrAddress))
(type OptionFloatRoundingMode (primitive OptionFloatRoundingMode))
(type VecU8 (primitive VecU8))
(type AMO (primitive AMO))
(type VecMachLabel extern (enum))

;; Converters

(convert u8 i32 u8_as_i32)
(decl u8_as_i32 (u8) i32)
(extern constructor u8_as_i32 u8_as_i32)

(decl convert_valueregs_reg (ValueRegs) Reg)
(rule (convert_valueregs_reg x)
  (value_regs_get x 0))
(convert ValueRegs Reg convert_valueregs_reg)


;; ISA Extension helpers

(decl pure has_v () bool)
(extern constructor has_v has_v)

(decl pure has_zbkb () bool)
(extern constructor has_zbkb has_zbkb)

(decl pure has_zba () bool)
(extern constructor has_zba has_zba)

(decl pure has_zbb () bool)
(extern constructor has_zbb has_zbb)

(decl pure has_zbc () bool)
(extern constructor has_zbc has_zbc)

(decl pure has_zbs () bool)
(extern constructor has_zbs has_zbs)

(decl gen_float_round (FloatRoundOP Reg Type) Reg)
(rule
  (gen_float_round op rs ty)
  (let
    ((rd WritableReg (temp_writable_reg ty))
      (tmp WritableReg (temp_writable_reg $I64))
      (tmp2 WritableReg (temp_writable_reg $F64))
      (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty))))
    (writable_reg_to_reg rd)))

(decl gen_float_select_pseudo (FloatSelectOP Reg Reg Type) Reg)
(rule
  (gen_float_select_pseudo op x y ty)
  (let
    ((rd WritableReg (temp_writable_reg ty))
      (tmp WritableReg (temp_writable_reg $I64))
      (_ Unit (emit (MInst.FloatSelectPseudo op rd tmp x y ty))))
    (writable_reg_to_reg rd)))

(decl gen_float_select (FloatSelectOP Reg Reg Type) Reg)
(rule
  (gen_float_select op x y ty)
  (let
    ((rd WritableReg (temp_writable_reg ty))
      (tmp WritableReg (temp_writable_reg $I64))
      (_ Unit (emit (MInst.FloatSelect op rd tmp x y ty))))
    (writable_reg_to_reg rd)))


;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; RV32I Base Integer Instruction Set

;; Helper for emitting the `add` instruction.
;; rd ← rs1 + rs2
(decl rv_add (Reg Reg) Reg)
(rule (rv_add rs1 rs2)
  (alu_rrr (AluOPRRR.Add) rs1 rs2))

;; Helper for emitting the `addi` ("Add Immediate") instruction.
;; rd ← rs1 + sext(imm)
(decl rv_addi (Reg Imm12) Reg)
(rule (rv_addi rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Addi) rs1 imm))

;; Helper for emitting the `sub` instruction.
;; rd ← rs1 - rs2
(decl rv_sub (Reg Reg) Reg)
(rule (rv_sub rs1 rs2)
  (alu_rrr (AluOPRRR.Sub) rs1 rs2))

;; Helper for emitting the `neg` instruction.
;; This instruction is a mnemonic for `sub rd, zero, rs1`.
(decl rv_neg (Reg) Reg)
(rule (rv_neg rs1)
  (alu_rrr (AluOPRRR.Sub) (zero_reg) rs1))

;; Helper for emitting the `sll` ("Shift Left Logical") instruction.
;; rd ← rs1 << rs2
(decl rv_sll (Reg Reg) Reg)
(rule (rv_sll rs1 rs2)
  (alu_rrr (AluOPRRR.Sll) rs1 rs2))

;; Helper for emitting the `slli` ("Shift Left Logical Immediate") instruction.
;; rd ← rs1 << uext(imm)
(decl rv_slli (Reg Imm12) Reg)
(rule (rv_slli rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Slli) rs1 imm))

;; Helper for emitting the `srl` ("Shift Right Logical") instruction.
;; rd ← rs1 >> rs2
(decl rv_srl (Reg Reg) Reg)
(rule (rv_srl rs1 rs2)
  (alu_rrr (AluOPRRR.Srl) rs1 rs2))

;; Helper for emitting the `srli` ("Shift Right Logical Immediate") instruction.
;; rd ← rs1 >> uext(imm)
(decl rv_srli (Reg Imm12) Reg)
(rule (rv_srli rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Srli) rs1 imm))

;; Helper for emitting the `sra` ("Shift Right Arithmetic") instruction.
;; rd ← rs1 >> rs2
(decl rv_sra (Reg Reg) Reg)
(rule (rv_sra rs1 rs2)
  (alu_rrr (AluOPRRR.Sra) rs1 rs2))

;; Helper for emitting the `srai` ("Shift Right Arithmetic Immediate") instruction.
;; rd ← rs1 >> uext(imm)
(decl rv_srai (Reg Imm12) Reg)
(rule (rv_srai rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Srai) rs1 imm))

;; Helper for emitting the `or` instruction.
;; rd ← rs1 ∨ rs2
(decl rv_or (Reg Reg) Reg)
(rule (rv_or rs1 rs2)
  (alu_rrr (AluOPRRR.Or) rs1 rs2))

;; Helper for emitting the `ori` ("Or Immediate") instruction.
;; rd ← rs1 ∨ uext(imm)
(decl rv_ori (Reg Imm12) Reg)
(rule (rv_ori rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Ori) rs1 imm))

;; Helper for emitting the `xor` instruction.
;; rd ← rs1 ⊕ rs2
(decl rv_xor (Reg Reg) Reg)
(rule (rv_xor rs1 rs2)
  (alu_rrr (AluOPRRR.Xor) rs1 rs2))

;; Helper for emitting the `xori` ("Exlusive Or Immediate") instruction.
;; rd ← rs1 ⊕ uext(imm)
(decl rv_xori (Reg Imm12) Reg)
(rule (rv_xori rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Xori) rs1 imm))

;; Helper for emitting the `not` instruction.
;; This instruction is a mnemonic for `xori rd, rs1, -1`.
(decl rv_not (Reg) Reg)
(rule (rv_not rs1)
  (rv_xori rs1 (imm12_const -1)))

;; Helper for emitting the `and` instruction.
;; rd ← rs1 ∧ rs2
(decl rv_and (Reg Reg) Reg)
(rule (rv_and rs1 rs2)
  (alu_rrr (AluOPRRR.And) rs1 rs2))

;; Helper for emitting the `andi` ("And Immediate") instruction.
;; rd ← rs1 ∧ uext(imm)
(decl rv_andi (Reg Imm12) Reg)
(rule (rv_andi rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm))

;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction.
;; rd ← rs1 < rs2
(decl rv_sltu (Reg Reg) Reg)
(rule (rv_sltu rs1 rs2)
  (alu_rrr (AluOPRRR.SltU) rs1 rs2))

;; Helper for emitting the `snez` instruction.
;; This instruction is a mnemonic for `sltu rd, zero, rs`.
(decl rv_snez (Reg) Reg)
(rule (rv_snez rs1)
  (rv_sltu (zero_reg) rs1))

;; Helper for emiting the `sltiu` ("Set Less Than Immediate Unsigned") instruction.
;; rd ← rs1 < imm
(decl rv_sltiu (Reg Imm12) Reg)
(rule (rv_sltiu rs1 imm)
  (alu_rr_imm12 (AluOPRRI.SltiU) rs1 imm))

;; Helper for emitting the `seqz` instruction.
;; This instruction is a mnemonic for `sltiu rd, rs, 1`.
(decl rv_seqz (Reg) Reg)
(rule (rv_seqz rs1)
  (rv_sltiu rs1 (imm12_const 1)))


;; RV64I Base Integer Instruction Set
;; Unlike RV32I instructions these are only present in the 64bit ISA

;; Helper for emitting the `addw` ("Add Word") instruction.
;; rd ← sext32(rs1) + sext32(rs2)
(decl rv_addw (Reg Reg) Reg)
(rule (rv_addw rs1 rs2)
  (alu_rrr (AluOPRRR.Addw) rs1 rs2))

;; Helper for emitting the `addiw` ("Add Word Immediate") instruction.
;; rd ← sext32(rs1) + imm
(decl rv_addiw (Reg Imm12) Reg)
(rule (rv_addiw rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Addiw) rs1 imm))

;; Helper for emitting the `sext.w` ("Sign Extend Word") instruction.
;; This instruction is a mnemonic for `addiw rd, rs, zero`.
(decl rv_sextw (Reg) Reg)
(rule (rv_sextw rs1)
  (rv_addiw rs1 (imm12_const 0)))

;; Helper for emitting the `subw` ("Subtract Word") instruction.
;; rd ← sext32(rs1) - sext32(rs2)
(decl rv_subw (Reg Reg) Reg)
(rule (rv_subw rs1 rs2)
  (alu_rrr (AluOPRRR.Subw) rs1 rs2))

;; Helper for emitting the `sllw` ("Shift Left Logical Word") instruction.
;; rd ← sext32(uext32(rs1) << rs2)
(decl rv_sllw (Reg Reg) Reg)
(rule (rv_sllw rs1 rs2)
  (alu_rrr (AluOPRRR.Sllw) rs1 rs2))

;; Helper for emitting the `slliw` ("Shift Left Logical Immediate Word") instruction.
;; rd ← sext32(uext32(rs1) << imm)
(decl rv_slliw (Reg Imm12) Reg)
(rule (rv_slliw rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Slliw) rs1 imm))

;; Helper for emitting the `srlw` ("Shift Right Logical Word") instruction.
;; rd ← sext32(uext32(rs1) >> rs2)
(decl rv_srlw (Reg Reg) Reg)
(rule (rv_srlw rs1 rs2)
  (alu_rrr (AluOPRRR.Srlw) rs1 rs2))

;; Helper for emitting the `srliw` ("Shift Right Logical Immediate Word") instruction.
;; rd ← sext32(uext32(rs1) >> imm)
(decl rv_srliw (Reg Imm12) Reg)
(rule (rv_srliw rs1 imm)
  (alu_rr_imm12 (AluOPRRI.SrliW) rs1 imm))

;; Helper for emitting the `sraw` ("Shift Right Arithmetic Word") instruction.
;; rd ← sext32(rs1 >> rs2)
(decl rv_sraw (Reg Reg) Reg)
(rule (rv_sraw rs1 rs2)
  (alu_rrr (AluOPRRR.Sraw) rs1 rs2))

;; Helper for emitting the `sraiw` ("Shift Right Arithmetic Immediate Word") instruction.
;; rd ← sext32(rs1 >> imm)
(decl rv_sraiw (Reg Imm12) Reg)
(rule (rv_sraiw rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Sraiw) rs1 imm))


;; RV32M Extension
;; TODO: Enable these instructions only when we have the M extension

;; Helper for emitting the `mul` instruction.
;; rd ← rs1 × rs2
(decl rv_mul (Reg Reg) Reg)
(rule (rv_mul rs1 rs2)
  (alu_rrr (AluOPRRR.Mul) rs1 rs2))

;; Helper for emitting the `mulh` ("Multiply High Signed Signed") instruction.
;; rd ← (sext(rs1) × sext(rs2)) » xlen
(decl rv_mulh (Reg Reg) Reg)
(rule (rv_mulh rs1 rs2)
  (alu_rrr (AluOPRRR.Mulh) rs1 rs2))

;; Helper for emitting the `mulhu` ("Multiply High Unsigned Unsigned") instruction.
;; rd ← (uext(rs1) × uext(rs2)) » xlen
(decl rv_mulhu (Reg Reg) Reg)
(rule (rv_mulhu rs1 rs2)
  (alu_rrr (AluOPRRR.Mulhu) rs1 rs2))

;; Helper for emitting the `div` instruction.
;; rd ← rs1 ÷ rs2
(decl rv_div (Reg Reg) Reg)
(rule (rv_div rs1 rs2)
  (alu_rrr (AluOPRRR.Div) rs1 rs2))

;; Helper for emitting the `divu` ("Divide Unsigned") instruction.
;; rd ← rs1 ÷ rs2
(decl rv_divu (Reg Reg) Reg)
(rule (rv_divu rs1 rs2)
  (alu_rrr (AluOPRRR.DivU) rs1 rs2))

;; Helper for emitting the `rem` instruction.
;; rd ← rs1 mod rs2
(decl rv_rem (Reg Reg) Reg)
(rule (rv_rem rs1 rs2)
  (alu_rrr (AluOPRRR.Rem) rs1 rs2))

;; Helper for emitting the `remu` ("Remainder Unsigned") instruction.
;; rd ← rs1 mod rs2
(decl rv_remu (Reg Reg) Reg)
(rule (rv_remu rs1 rs2)
  (alu_rrr (AluOPRRR.RemU) rs1 rs2))


;; RV64M Extension
;; TODO: Enable these instructions only when we have the M extension

;; Helper for emitting the `mulw` ("Multiply Word") instruction.
;; rd ← uext32(rs1) × uext32(rs2)
(decl rv_mulw (Reg Reg) Reg)
(rule (rv_mulw rs1 rs2)
  (alu_rrr (AluOPRRR.Mulw) rs1 rs2))

;; Helper for emitting the `divw` ("Divide Word") instruction.
;; rd ← sext32(rs1) ÷ sext32(rs2)
(decl rv_divw (Reg Reg) Reg)
(rule (rv_divw rs1 rs2)
  (alu_rrr (AluOPRRR.Divw) rs1 rs2))

;; Helper for emitting the `divuw` ("Divide Unsigned Word") instruction.
;; rd ← uext32(rs1) ÷ uext32(rs2)
(decl rv_divuw (Reg Reg) Reg)
(rule (rv_divuw rs1 rs2)
  (alu_rrr (AluOPRRR.Divuw) rs1 rs2))

;; Helper for emitting the `remw` ("Remainder Word") instruction.
;; rd ← sext32(rs1) mod sext32(rs2)
(decl rv_remw (Reg Reg) Reg)
(rule (rv_remw rs1 rs2)
  (alu_rrr (AluOPRRR.Remw) rs1 rs2))

;; Helper for emitting the `remuw` ("Remainder Unsigned Word") instruction.
;; rd ← uext32(rs1) mod uext32(rs2)
(decl rv_remuw (Reg Reg) Reg)
(rule (rv_remuw rs1 rs2)
  (alu_rrr (AluOPRRR.Remuw) rs1 rs2))


;; F and D Extensions
;; TODO: Enable these instructions only when we have the F or D extensions

;; Helper for emitting the `fadd` instruction.
(decl rv_fadd (Type Reg Reg) Reg)
(rule (rv_fadd $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddS) $F32 rs1 rs2))
(rule (rv_fadd $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddD) $F64 rs1 rs2))

;; Helper for emitting the `fsub` instruction.
(decl rv_fsub (Type Reg Reg) Reg)
(rule (rv_fsub $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubS) $F32 rs1 rs2))
(rule (rv_fsub $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubD) $F64 rs1 rs2))

;; Helper for emitting the `fmul` instruction.
(decl rv_fmul (Type Reg Reg) Reg)
(rule (rv_fmul $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulS) $F32 rs1 rs2))
(rule (rv_fmul $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulD) $F64 rs1 rs2))

;; Helper for emitting the `fdiv` instruction.
(decl rv_fdiv (Type Reg Reg) Reg)
(rule (rv_fdiv $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivS) $F32 rs1 rs2))
(rule (rv_fdiv $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivD) $F64 rs1 rs2))

;; Helper for emitting the `fsqrt` instruction.
(decl rv_fsqrt (Type Reg) Reg)
(rule (rv_fsqrt $F32 rs1) (fpu_rr (FpuOPRR.FsqrtS) $F32 rs1))
(rule (rv_fsqrt $F64 rs1) (fpu_rr (FpuOPRR.FsqrtD) $F64 rs1))

;; Helper for emitting the `fmadd` instruction.
(decl rv_fmadd (Type Reg Reg Reg) Reg)
(rule (rv_fmadd $F32 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 rs1 rs2 rs3))
(rule (rv_fmadd $F64 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 rs1 rs2 rs3))

;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction.
(decl rv_fcvtds (Reg) Reg)
(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F32 rs1))

;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction.
(decl rv_fcvtsd (Reg) Reg)
(rule (rv_fcvtsd rs1) (fpu_rr (FpuOPRR.FcvtSD) $F64 rs1))

;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction.
;; The output of this instruction is `rs1` with the sign bit from `rs2`
;; This implements the `copysign` operation
(decl rv_fsgnj (Type Reg Reg) Reg)
(rule (rv_fsgnj $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjS) $F32 rs1 rs2))
(rule (rv_fsgnj $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjD) $F64 rs1 rs2))

;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction.
;; The output of this instruction is `rs1` with the negated sign bit from `rs2`
;; When `rs1 == rs2` this implements the `neg` operation
(decl rv_fsgnjn (Type Reg Reg) Reg)
(rule (rv_fsgnjn $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnS) $F32 rs1 rs2))
(rule (rv_fsgnjn $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnD) $F64 rs1 rs2))

;; Helper for emitting the `fneg` ("Floating Point Negate") instruction.
;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1`
(decl rv_fneg (Type Reg) Reg)
(rule (rv_fneg ty rs1) (rv_fsgnjn ty rs1 rs1))

;; Helper for emitting the `fsgnjx` ("Floating Point Sign Injection Exclusive") instruction.
;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`.
;; When `rs1 == rs2` this implements `fabs`
(decl rv_fsgnjx (Type Reg Reg) Reg)
(rule (rv_fsgnjx $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxS) $F32 rs1 rs2))
(rule (rv_fsgnjx $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxD) $F64 rs1 rs2))

;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction.
;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1`
(decl rv_fabs (Type Reg) Reg)
(rule (rv_fabs ty rs1) (rv_fsgnjx ty rs1 rs1))

;; Helper for emitting the `feq` ("Float Equal") instruction.
(decl rv_feq (Type Reg Reg) Reg)
(rule (rv_feq $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqS) $I64 rs1 rs2))
(rule (rv_feq $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqD) $I64 rs1 rs2))

;; Helper for emitting the `flt` ("Float Less Than") instruction.
(decl rv_flt (Type Reg Reg) Reg)
(rule (rv_flt $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FltS) $I64 rs1 rs2))
(rule (rv_flt $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FltD) $I64 rs1 rs2))

;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction.
(decl rv_fle (Type Reg Reg) Reg)
(rule (rv_fle $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FleS) $I64 rs1 rs2))
(rule (rv_fle $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FleD) $I64 rs1 rs2))

;; Helper for emitting the `fgt` ("Float Greater Than") instruction.
;; Note: The arguments are reversed
(decl rv_fgt (Type Reg Reg) Reg)
(rule (rv_fgt ty rs1 rs2) (rv_flt ty rs2 rs1))

;; Helper for emitting the `fge` ("Float Greater Than or Equal") instruction.
;; Note: The arguments are reversed
(decl rv_fge (Type Reg Reg) Reg)
(rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1))


;; `Zba` Extension Instructions

;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction.
;; rd ← uext32(rs1) + uext32(rs2)
(decl rv_adduw (Reg Reg) Reg)
(rule (rv_adduw rs1 rs2)
  (alu_rrr (AluOPRRR.Adduw) rs1 rs2))

;; Helper for emitting the `zext.w` ("Zero Extend Word") instruction.
;; This instruction is a mnemonic for `adduw rd, rs1, zero`.
;; rd ← uext32(rs1)
(decl rv_zextw (Reg) Reg)
(rule (rv_zextw rs1)
  (rv_adduw rs1 (zero_reg)))

;; Helper for emitting the `slli.uw` ("Shift Left Logical Immediate Unsigned Word") instruction.
;; rd ← uext32(rs1) << imm
(decl rv_slliuw (Reg Imm12) Reg)
(rule (rv_slliuw rs1 imm)
  (alu_rr_imm12 (AluOPRRI.SlliUw) rs1 imm))


;; `Zbb` Extension Instructions

;; Helper for emitting the `andn` ("And Negated") instruction.
;; rd ← rs1 ∧ ~(rs2)
(decl rv_andn (Reg Reg) Reg)
(rule (rv_andn rs1 rs2)
  (alu_rrr (AluOPRRR.Andn) rs1 rs2))

;; Helper for emitting the `orn` ("Or Negated") instruction.
;; rd ← rs1 ∨ ~(rs2)
(decl rv_orn (Reg Reg) Reg)
(rule (rv_orn rs1 rs2)
  (alu_rrr (AluOPRRR.Orn) rs1 rs2))

;; Helper for emitting the `clz` ("Count Leading Zero Bits") instruction.
(decl rv_clz (Reg) Reg)
(rule (rv_clz rs1)
  (alu_rr_funct12 (AluOPRRI.Clz) rs1))

;; Helper for emitting the `clzw` ("Count Leading Zero Bits in Word") instruction.
(decl rv_clzw (Reg) Reg)
(rule (rv_clzw rs1)
  (alu_rr_funct12 (AluOPRRI.Clzw) rs1))

;; Helper for emitting the `ctz` ("Count Trailing Zero Bits") instruction.
(decl rv_ctz (Reg) Reg)
(rule (rv_ctz rs1)
  (alu_rr_funct12 (AluOPRRI.Ctz) rs1))

;; Helper for emitting the `ctzw` ("Count Trailing Zero Bits in Word") instruction.
(decl rv_ctzw (Reg) Reg)
(rule (rv_ctzw rs1)
  (alu_rr_funct12 (AluOPRRI.Ctzw) rs1))

;; Helper for emitting the `cpop` ("Count Population") instruction.
(decl rv_cpop (Reg) Reg)
(rule (rv_cpop rs1)
  (alu_rr_funct12 (AluOPRRI.Cpop) rs1))

;; Helper for emitting the `max` instruction.
(decl rv_max (Reg Reg) Reg)
(rule (rv_max rs1 rs2)
  (alu_rrr (AluOPRRR.Max) rs1 rs2))

;; Helper for emitting the `sext.b` instruction.
(decl rv_sextb (Reg) Reg)
(rule (rv_sextb rs1)
  (alu_rr_imm12 (AluOPRRI.Sextb) rs1 (imm12_const 0)))

;; Helper for emitting the `sext.h` instruction.
(decl rv_sexth (Reg) Reg)
(rule (rv_sexth rs1)
  (alu_rr_imm12 (AluOPRRI.Sexth) rs1 (imm12_const 0)))

;; Helper for emitting the `zext.h` instruction.
(decl rv_zexth (Reg) Reg)
(rule (rv_zexth rs1)
  (alu_rr_imm12 (AluOPRRI.Zexth) rs1 (imm12_const 0)))

;; Helper for emitting the `rol` ("Rotate Left") instruction.
(decl rv_rol (Reg Reg) Reg)
(rule (rv_rol rs1 rs2)
  (alu_rrr (AluOPRRR.Rol) rs1 rs2))

;; Helper for emitting the `rolw` ("Rotate Left Word") instruction.
(decl rv_rolw (Reg Reg) Reg)
(rule (rv_rolw rs1 rs2)
  (alu_rrr (AluOPRRR.Rolw) rs1 rs2))

;; Helper for emitting the `ror` ("Rotate Right") instruction.
(decl rv_ror (Reg Reg) Reg)
(rule (rv_ror rs1 rs2)
  (alu_rrr (AluOPRRR.Ror) rs1 rs2))

;; Helper for emitting the `rorw` ("Rotate Right Word") instruction.
(decl rv_rorw (Reg Reg) Reg)
(rule (rv_rorw rs1 rs2)
  (alu_rrr (AluOPRRR.Rorw) rs1 rs2))

;; Helper for emitting the `rev8` ("Byte Reverse") instruction.
(decl rv_rev8 (Reg) Reg)
(rule (rv_rev8 rs1)
  (alu_rr_funct12 (AluOPRRI.Rev8) rs1))

;; Helper for emitting the `brev8` ("Bit Reverse Inside Bytes") instruction.
;; TODO: This instruction is mentioned in some older versions of the
;; spec, but has since disappeared, we should follow up on this.
;; It probably was renamed to `rev.b` which seems to be the closest match.
(decl rv_brev8 (Reg) Reg)
(rule (rv_brev8 rs1)
  (alu_rr_funct12 (AluOPRRI.Brev8) rs1))

;; Helper for emitting the `bseti` ("Single-Bit Set Immediate") instruction.
(decl rv_bseti (Reg Imm12) Reg)
(rule (rv_bseti rs1 imm)
  (alu_rr_imm12 (AluOPRRI.Bseti) rs1 imm))


;; `Zbkb` Extension Instructions

;; Helper for emitting the `pack` ("Pack low halves of registers") instruction.
(decl rv_pack (Reg Reg) Reg)
(rule (rv_pack rs1 rs2)
  (alu_rrr (AluOPRRR.Pack) rs1 rs2))

;; Helper for emitting the `packw` ("Pack low 16-bits of registers") instruction.
(decl rv_packw (Reg Reg) Reg)
(rule (rv_packw rs1 rs2)
  (alu_rrr (AluOPRRR.Packw) rs1 rs2))


;; for load immediate
(decl imm (Type u64) Reg)
(extern constructor imm imm)

;; Imm12 Rules

(decl pure imm12_zero () Imm12)
(rule
  (imm12_zero)
  (imm12_const 0))

(decl pure imm12_const (i32) Imm12)
(extern constructor imm12_const imm12_const)

(decl load_imm12 (i32) Reg)
(rule
  (load_imm12 x)
  (rv_addi (zero_reg) (imm12_const x)))

;; for load immediate
(decl imm_from_bits (u64) Imm12)
(extern constructor imm_from_bits imm_from_bits)

(decl imm_from_neg_bits (i64) Imm12)
(extern constructor imm_from_neg_bits imm_from_neg_bits)

(decl imm12_const_add (i32 i32) Imm12)
(extern constructor imm12_const_add imm12_const_add)

(decl imm12_and (Imm12 i32) Imm12)
(extern constructor imm12_and imm12_and)

;; Helper for get negative of Imm12
(decl neg_imm12 (Imm12) Imm12)
(extern constructor neg_imm12 neg_imm12)

;; Imm12 Extractors

;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`.
(decl imm12_from_value (Imm12) Value)
(extractor
  (imm12_from_value n)
  (def_inst (iconst (u64_from_imm64 (imm12_from_u64 n)))))

(decl imm12_from_u64 (Imm12) u64)
(extern extractor imm12_from_u64 imm12_from_u64)


;; Float Helpers

(decl gen_default_frm () OptionFloatRoundingMode)
(extern constructor gen_default_frm gen_default_frm)

;; Helper for emitting `MInst.FpuRR` instructions.
(decl fpu_rr (FpuOPRR Type Reg) Reg)
(rule (fpu_rr op ty src)
      (let ((dst WritableReg (temp_writable_reg ty))
            (_ Unit (emit (MInst.FpuRR op (gen_default_frm) dst src))))
        dst))

;; Helper for emitting `MInst.AluRRR` instructions.
(decl alu_rrr (AluOPRRR Reg Reg) Reg)
(rule (alu_rrr op src1 src2)
      (let ((dst WritableReg (temp_writable_reg $I64))
            (_ Unit (emit (MInst.AluRRR op dst src1 src2))))
        dst))


(decl pack_float_rounding_mode (FRM) OptionFloatRoundingMode)
(extern constructor pack_float_rounding_mode pack_float_rounding_mode)

;; Helper for emitting `MInst.AluRRR` instructions.
(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg)
(rule (fpu_rrr op ty src1 src2)
      (let ((dst WritableReg (temp_writable_reg ty))
            (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2))))
        dst))


;; Helper for emitting `MInst.FpuRRRR` instructions.
(decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg)
(rule (fpu_rrrr op ty src1 src2 src3)
      (let ((dst WritableReg (temp_writable_reg ty))
            (_ Unit (emit (MInst.FpuRRRR op (gen_default_frm) dst src1 src2 src3))))
        dst))


;; Helper for emitting `MInst.AluRRImm12` instructions.
(decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg)
(rule (alu_rr_imm12 op src imm)
      (let ((dst WritableReg (temp_writable_reg $I64))
            (_ Unit (emit (MInst.AluRRImm12 op dst src imm))))
        dst))

;; some instruction use imm12 as funct12.
;; so we don't need the imm12 paramter.
(decl alu_rr_funct12 (AluOPRRI Reg) Reg)
(rule (alu_rr_funct12 op src)
      (let ((dst WritableReg (temp_writable_reg $I64))
            (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
        dst))

(decl select_addi (Type) AluOPRRI)
(rule 1 (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw))
(rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi))


(decl gen_bnot (Type ValueRegs) ValueRegs)
(rule 1 (gen_bnot $I128 x)
  (let ((lo Reg (rv_not (value_regs_get x 0)))
        (hi Reg (rv_not (value_regs_get x 1))))
    (value_regs lo hi)))

(rule 0 (gen_bnot (fits_in_64 _) x)
  (rv_not x))


(decl gen_and (Type ValueRegs ValueRegs) ValueRegs)
(rule 1 (gen_and $I128 x y)
  (value_regs
    (rv_and (value_regs_get x 0) (value_regs_get y 0))
    (rv_and (value_regs_get x 1) (value_regs_get y 1))))

(rule 0 (gen_and (fits_in_64 _) x y)
  (rv_and (value_regs_get x 0) (value_regs_get y 0)))


(decl gen_or (Type ValueRegs ValueRegs) ValueRegs)
(rule 1 (gen_or $I128 x y)
  (value_regs
    (rv_or (value_regs_get x 0) (value_regs_get y 0))
    (rv_or (value_regs_get x 1) (value_regs_get y 1))))

(rule 0 (gen_or (fits_in_64 _) x y)
  (rv_or (value_regs_get x 0) (value_regs_get y 0)))


(decl lower_bit_reverse (Reg Type) Reg)

(rule
  (lower_bit_reverse r $I8)
  (gen_brev8 r $I8))

(rule
  (lower_bit_reverse r $I16)
  (let
    ((tmp Reg (gen_brev8 r $I16))
      (tmp2 Reg (gen_rev8 tmp))
      (result Reg (rv_srli tmp2 (imm12_const 48))))
    result))

(rule
  (lower_bit_reverse r $I32)
  (let
    ((tmp Reg (gen_brev8 r $I32))
      (tmp2 Reg (gen_rev8 tmp))
      (result Reg (rv_srli tmp2 (imm12_const 32))))
    result))

(rule
  (lower_bit_reverse r $I64)
  (let
    ((tmp Reg (gen_rev8 r)))
    (gen_brev8 tmp $I64)))


(decl lower_ctz (Type Reg) Reg)
(rule (lower_ctz ty x)
  (gen_cltz $false x ty))

(rule 1 (lower_ctz (fits_in_16 ty) x)
  (if-let $true (has_zbb))
  (let ((tmp Reg (gen_bseti x (ty_bits ty))))
    (rv_ctzw tmp)))

(rule 2 (lower_ctz $I32 x)
  (if-let $true (has_zbb))
  (rv_ctzw x))

(rule 2 (lower_ctz $I64 x)
  (if-let $true (has_zbb))
  (rv_ctz x))

;; Count trailing zeros from a i128 bit value.
;; We count both halves separately and conditionally add them if it makes sense.
(decl lower_ctz_128 (ValueRegs) ValueRegs)
(rule (lower_ctz_128 x)
  (let ((x_lo Reg (value_regs_get x 0))
        (x_hi Reg (value_regs_get x 1))
        ;; Count both halves
        (high Reg (lower_ctz $I64 x_hi))
        (low Reg (lower_ctz $I64 x_lo))
        ;; Only add the top half if the bottom is zero
        (high Reg (gen_select_reg (IntCC.Equal) x_lo (zero_reg) high (zero_reg)))
        (result Reg (rv_add low high)))
    (zext result $I64 $I128)))

(decl lower_clz (Type Reg) Reg)
(rule (lower_clz ty rs)
  (gen_cltz $true rs ty))

(rule 1 (lower_clz (fits_in_16 ty) r)
  (if-let $true (has_zbb))
  (let ((tmp Reg (zext r ty $I64))
        (count Reg (rv_clz tmp))
        ;; We always do the operation on the full 64-bit register, so subtract 64 from the result.
        (result Reg (rv_addi count (imm12_const_add (ty_bits ty) -64))))
    result))

(rule 2 (lower_clz $I32 r)
  (if-let $true (has_zbb))
  (rv_clzw r))

(rule 2 (lower_clz $I64 r)
  (if-let $true (has_zbb))
  (rv_clz r))


;; Count leading zeros from a i128 bit value.
;; We count both halves separately and conditionally add them if it makes sense.
(decl lower_clz_i128 (ValueRegs) ValueRegs)
(rule (lower_clz_i128 x)
  (let ((x_lo Reg (value_regs_get x 0))
        (x_hi Reg (value_regs_get x 1))
        ;; Count both halves
        (high Reg (lower_clz $I64 x_hi))
        (low Reg (lower_clz $I64 x_lo))
        ;; Only add the bottom zeros if the top half is zero
        (low Reg (gen_select_reg (IntCC.Equal) x_hi (zero_reg) low (zero_reg)))
        (result Reg (rv_add high low)))
    (zext result $I64 $I128)))


(decl lower_cls (Type Reg) Reg)
(rule (lower_cls ty r)
  (let ((tmp Reg (ext_int_if_need $true r ty))
        (tmp2 Reg (gen_select_reg (IntCC.SignedLessThan) tmp (zero_reg) (rv_not tmp) tmp))
        (tmp3 Reg (lower_clz ty tmp2)))
    (rv_addi tmp3 (imm12_const -1))))

;; If the sign bit is set, we count the leading zeros of the inverted value.
;; Otherwise we can just count the leading zeros of the original value.
;; Subtract 1 since the sign bit does not count.
(decl lower_cls_i128 (ValueRegs) ValueRegs)
(rule (lower_cls_i128 x)
  (let ((low Reg (value_regs_get x 0))
        (high Reg (value_regs_get x 1))
        (low Reg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not low) low))
        (high Reg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (rv_not high) high))
        (tmp ValueRegs (lower_clz_i128 (value_regs low high)))
        (count Reg (value_regs_get tmp 0))
        (result Reg (rv_addi count (imm12_const -1))))
    (zext result $I64 $I128)))


(decl gen_cltz (bool Reg Type) Reg)
(rule (gen_cltz leading rs ty)
  (let ((tmp WritableReg (temp_writable_reg $I64))
        (step WritableReg (temp_writable_reg $I64))
        (sum WritableReg (temp_writable_reg $I64))
        (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty))))
    sum))


;; Extends an integer if it is smaller than 64 bits.
(decl ext_int_if_need (bool ValueRegs Type) ValueRegs)
;;; For values smaller than 64 bits, we need to extend them to 64 bits
(rule 0 (ext_int_if_need $true val (fits_in_32 (ty_int ty)))
  (sext val ty $I64))
(rule 0 (ext_int_if_need $false val (fits_in_32 (ty_int ty)))
  (zext val ty $I64))
;; If the value is larger than one machine register, we don't need to do anything
(rule 1 (ext_int_if_need _ r $I64) r)
(rule 2 (ext_int_if_need _ r $I128) r)


;; Performs a zero extension of the given value
(decl zext (ValueRegs Type Type) ValueRegs)
(rule (zext val from_ty to_ty) (extend val (ExtendOp.Zero) from_ty to_ty))

;; Performs a signed extension of the given value
(decl sext (ValueRegs Type Type) ValueRegs)
(rule (sext val from_ty to_ty) (extend val (ExtendOp.Signed) from_ty to_ty))

(type ExtendOp
  (enum
    (Zero)
    (Signed)))

;; Performs either a sign or zero extension of the given value
(decl extend (ValueRegs ExtendOp Type Type) ValueRegs)

;;; Generic Rules Extending to I64
(decl pure extend_shift_op (ExtendOp) AluOPRRI)
(rule (extend_shift_op (ExtendOp.Zero)) (AluOPRRI.Srli))
(rule (extend_shift_op (ExtendOp.Signed)) (AluOPRRI.Srai))

;; In the most generic case, we shift left and then shift right.
;; The type of right shift is determined by the extend op.
(rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty))
  (let ((val Reg (value_regs_get val 0))
        (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty))))
        (left Reg (rv_slli val shift))
        (shift_op AluOPRRI (extend_shift_op extend_op))
        (right Reg (alu_rr_imm12 shift_op left shift)))
    right))

;; If we are zero extending a U8 we can use a `andi` instruction.
(rule 1 (extend val (ExtendOp.Zero) $I8 (fits_in_64 to_ty))
  (let ((val Reg (value_regs_get val 0)))
    (rv_andi val (imm12_const 255))))

;; When signed extending from 32 to 64 bits we can use a
;; `addiw val 0`. Also known as a `sext.w`
(rule 1 (extend val (ExtendOp.Signed) $I32 $I64)
  (let ((val Reg (value_regs_get val 0)))
    (rv_sextw val)))


;; No point in trying to use `packh` here to zero extend 8 bit values
;; since we can just use `andi` instead which is part of the base ISA.

;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values
(rule 1 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _))
  (if-let $true (has_zbkb))
  (let ((val Reg (value_regs_get val 0)))
    (rv_packw val (zero_reg))))

;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers
(rule 1 (extend val (ExtendOp.Zero) $I32 $I64)
  (if-let $true (has_zbkb))
  (let ((val Reg (value_regs_get val 0)))
    (rv_pack val (zero_reg))))


;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction.
(rule 1 (extend val (ExtendOp.Signed) $I8 (fits_in_64 _))
  (if-let $true (has_zbb))
  (let ((val Reg (value_regs_get val 0)))
    (rv_sextb val)))

;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction.
(rule 1 (extend val (ExtendOp.Signed) $I16 (fits_in_64 _))
  (if-let $true (has_zbb))
  (let ((val Reg (value_regs_get val 0)))
    (rv_sexth val)))

;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction.
(rule 2 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _))
  (if-let $true (has_zbb))
  (let ((val Reg (value_regs_get val 0)))
    (rv_zexth val)))

;; With `zba` we have a `zext.w` instruction
(rule 2 (extend val (ExtendOp.Zero) $I32 $I64)
  (if-let $true (has_zba))
  (let ((val Reg (value_regs_get val 0)))
    (rv_zextw val)))

;;; Signed rules extending to I128
;; Extend the bottom part, and extract the sign bit from the bottom as the top
(rule 3 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128)
  (let ((val Reg (value_regs_get val 0))
        (low Reg (extend val (ExtendOp.Signed) from_ty $I64))
        (high Reg (rv_srai low (imm12_const 63))))
    (value_regs low high)))

;;; Unsigned rules extending to I128
;; Extend the bottom register to I64 and then just zero out the top half.
(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128)
  (let ((val Reg (value_regs_get val 0))
        (low Reg (extend val (ExtendOp.Zero) from_ty $I64))
        (high Reg (load_u64_constant 0)))
    (value_regs low high)))

;; Catch all rule for ignoring extensions of the same type.
(rule 4 (extend val _ ty ty) val)


(decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs)
(rule
  (lower_b128_binary op a b)
  (let
    ( ;; low part.
      (low Reg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0)))
      ;; high part.
      (high Reg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1))))
    (value_regs low high)))

(decl lower_umlhi (Type Reg Reg) Reg)
(rule 1
  (lower_umlhi $I64 rs1 rs2)
  (rv_mulhu rs1 rs2))

(rule
  (lower_umlhi ty rs1 rs2)
  (let
    ((tmp Reg (rv_mul (ext_int_if_need $false rs1 ty) (ext_int_if_need $false rs2 ty))))
    (rv_srli tmp (imm12_const (ty_bits ty)))))

(decl lower_smlhi (Type Reg Reg) Reg)
(rule 1
  (lower_smlhi $I64 rs1 rs2)
  (rv_mulh rs1 rs2))

(rule
  (lower_smlhi ty rs1 rs2)
  (let
    ((tmp Reg (rv_mul rs1 rs2)))
    (rv_srli tmp (imm12_const (ty_bits ty)))))


(decl lower_rotl (Type Reg Reg) Reg)

(rule 1
  (lower_rotl $I64 rs amount)
  (if-let $true (has_zbb))
  (rv_rol rs amount))

(rule
  (lower_rotl $I64 rs amount)
  (if-let $false (has_zbb))
  (lower_rotl_shift $I64 rs amount))

(rule 1
  (lower_rotl $I32 rs amount)
  (if-let $true (has_zbb))
  (rv_rolw rs amount))

(rule
  (lower_rotl $I32 rs amount)
  (if-let $false (has_zbb))
  (lower_rotl_shift $I32 rs amount))

(rule -1
  (lower_rotl ty rs amount)
  (lower_rotl_shift ty rs amount))

;;; using shift to implement rotl.
(decl lower_rotl_shift (Type Reg Reg) Reg)

;;; for I8 and I16 ...
(rule
  (lower_rotl_shift ty rs amount)
  (let
    ((x ValueRegs (gen_shamt ty amount))
      (shamt Reg (value_regs_get x 0))
      (len_sub_shamt Reg (value_regs_get x 1))
      ;;
      (part1 Reg (rv_sll rs shamt))
      ;;
      (part2 Reg (rv_srl rs len_sub_shamt))
      (part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2)))
    (rv_or part1 part3)))


;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function.
;;;; this will return shift amount and (ty_bits - "shift amount")
;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform.
(decl gen_shamt (Type Reg) ValueRegs)
(extern constructor gen_shamt gen_shamt)

(decl lower_rotr (Type Reg Reg) Reg)

(rule 1
  (lower_rotr $I64 rs amount)
  (if-let $true (has_zbb))
  (rv_ror rs amount))
(rule
  (lower_rotr $I64 rs amount)
  (if-let $false (has_zbb))
  (lower_rotr_shift $I64 rs amount))

(rule 1
  (lower_rotr $I32 rs amount)
  (if-let $true (has_zbb))
  (rv_rorw rs amount))

(rule
  (lower_rotr $I32 rs amount)
  (if-let $false (has_zbb))
  (lower_rotr_shift $I32 rs amount))

(rule -1
  (lower_rotr ty rs amount)
  (lower_rotr_shift ty rs amount))

(decl lower_rotr_shift (Type Reg Reg) Reg)

;;;
(rule
  (lower_rotr_shift ty rs amount)
  (let
    ((x ValueRegs (gen_shamt ty amount))
      (shamt Reg (value_regs_get x 0))
      (len_sub_shamt Reg (value_regs_get x 1))
      ;;
      (part1 Reg (rv_srl rs shamt))
      ;;
      (part2 Reg (rv_sll rs len_sub_shamt))
      ;;
      (part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2)))
    (rv_or part1 part3)))


;; bseti: Set a single bit in a register, indexed by a constant.
(decl gen_bseti (Reg u64) Reg)
(rule (gen_bseti val bit)
  (if-let $false (has_zbs))
  (if-let $false (u64_le bit 12))
  (let ((const Reg (load_u64_constant (u64_shl 1 bit))))
    (rv_or val const)))

(rule (gen_bseti val bit)
  (if-let $false (has_zbs))
  (if-let $true (u64_le bit 12))
  (rv_ori val (imm12_const (u64_as_i32 (u64_shl 1 bit)))))

(rule (gen_bseti val bit)
  (if-let $true (has_zbs))
  (rv_bseti val (imm12_const (u64_as_i32 bit))))


(decl gen_popcnt (Reg Type) Reg)
(rule
  (gen_popcnt rs ty)
  (let
    ((tmp WritableReg (temp_writable_reg $I64))
      (step WritableReg (temp_writable_reg $I64))
      (sum WritableReg (temp_writable_reg $I64))
      (_ Unit (emit (MInst.Popcnt sum step tmp rs ty))))
    (writable_reg_to_reg sum)))

(decl lower_popcnt (Reg Type) Reg)
(rule 1 (lower_popcnt rs ty)
  (if-let $true (has_zbb))
  (rv_cpop (ext_int_if_need $false rs ty)))
(rule (lower_popcnt rs ty)
  (if-let $false (has_zbb))
  (gen_popcnt rs ty))

(decl lower_popcnt_i128 (ValueRegs) ValueRegs)
(rule
  (lower_popcnt_i128 a)
  (let
    ( ;; low part.
      (low Reg (lower_popcnt (value_regs_get a 0) $I64))
      ;; high part.
      (high Reg (lower_popcnt (value_regs_get a 1) $I64))
      ;; add toghter.
      (result Reg (rv_add low high)))
    (value_regs result (load_u64_constant 0))))

(decl lower_i128_rotl (ValueRegs ValueRegs) ValueRegs)
(rule
  (lower_i128_rotl x y)
  (let
    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
      (shamt Reg (value_regs_get tmp 0))
      (len_sub_shamt Reg (value_regs_get tmp 1))
      ;;
      (low_part1 Reg (rv_sll (value_regs_get x 0) shamt))
      (low_part2 Reg (rv_srl (value_regs_get x 1) len_sub_shamt))
      ;;; if shamt == 0 low_part2 will overflow we should zero instead.
      (low_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2))
      (low Reg (rv_or low_part1 low_part3))
      ;;
      (high_part1 Reg (rv_sll (value_regs_get x 1) shamt))
      (high_part2 Reg (rv_srl (value_regs_get x 0) len_sub_shamt))
      (high_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2))
      (high Reg (rv_or high_part1 high_part3))
      ;;
      (const64 Reg (load_u64_constant 64))
      (shamt_128 Reg (rv_andi (value_regs_get y 0) (imm12_const 127))))
    ;; right now we only rotate less than 64 bits.
    ;; if shamt is greater than or equal 64 , we should switch low and high.
    (value_regs
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)
    )))


(decl lower_i128_rotr (ValueRegs ValueRegs) ValueRegs)
(rule
  (lower_i128_rotr x y)
  (let
    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
      (shamt Reg (value_regs_get tmp 0))
      (len_sub_shamt Reg (value_regs_get tmp 1))
      ;;
      (low_part1 Reg (rv_srl (value_regs_get x 0) shamt))
      (low_part2 Reg (rv_sll (value_regs_get x 1) len_sub_shamt))
      ;;; if shamt == 0 low_part2 will overflow we should zero instead.
      (low_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2))
      (low Reg (rv_or low_part1 low_part3))
      ;;
      (high_part1 Reg (rv_srl (value_regs_get x 1) shamt))
      (high_part2 Reg (rv_sll (value_regs_get x 0) len_sub_shamt))
      (high_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2))
      (high Reg (rv_or high_part1 high_part3))

      ;;
      (const64 Reg (load_u64_constant 64))
      (shamt_128 Reg (rv_andi (value_regs_get y 0) (imm12_const 127))))
    ;; right now we only rotate less than 64 bits.
    ;; if shamt is greater than or equal 64 , we should switch low and high.
    (value_regs
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)
    )))


(decl lower_i128_ishl (ValueRegs ValueRegs) ValueRegs)
(rule
  (lower_i128_ishl x y)
  (let
    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
      (shamt Reg (value_regs_get tmp 0))
      (len_sub_shamt Reg (value_regs_get tmp 1))
      ;;
      (low Reg (rv_sll (value_regs_get x 0) shamt))
      ;; high part.
      (high_part1 Reg (rv_srl (value_regs_get x 0) len_sub_shamt))
      (high_part2 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part1))
      ;;
      (high_part3 Reg (rv_sll (value_regs_get x 1) shamt))
      (high Reg (rv_or high_part2 high_part3 ))
      ;;
      (const64 Reg (load_u64_constant 64))
      (shamt_128 Reg (rv_andi (value_regs_get y 0) (imm12_const 127))))
    (value_regs
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) low)
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high))))

(decl lower_i128_ushr (ValueRegs ValueRegs) ValueRegs)
(rule
  (lower_i128_ushr x y)
  (let
    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
      (shamt Reg (value_regs_get tmp 0))
      (len_sub_shamt Reg (value_regs_get tmp 1))

      ;; low part.
      (low_part1 Reg (rv_sll (value_regs_get x 1) len_sub_shamt))
      (low_part2 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1))
      ;;
      (low_part3 Reg (rv_srl (value_regs_get x 0) shamt))
      (low Reg (rv_or low_part2 low_part3 ))
      ;;
      (const64 Reg (load_u64_constant 64))

      ;;
      (high Reg (rv_srl (value_regs_get x 1) shamt))
      (shamt_128 Reg (rv_andi (value_regs_get y 0) (imm12_const 127))))
    (value_regs
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) high))))


(decl lower_i128_sshr (ValueRegs ValueRegs) ValueRegs)
(rule
  (lower_i128_sshr x y)
  (let
    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
      (shamt Reg (value_regs_get tmp 0))
      (len_sub_shamt Reg (value_regs_get tmp 1))

      ;; low part.
      (low_part1 Reg (rv_sll (value_regs_get x 1) len_sub_shamt))
      (low_part2 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1))
      ;;
      (low_part3 Reg (rv_srl (value_regs_get x 0) shamt))
      (low Reg (rv_or low_part2 low_part3 ))
      ;;
      (const64 Reg (load_u64_constant 64))
      ;;
      (high Reg (rv_sra (value_regs_get x 1) shamt))
      ;;
      (const_neg_1 Reg (load_imm12 -1))
      ;;
      (high_replacement Reg (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) const_neg_1 (zero_reg)))
      (const64 Reg (load_u64_constant 64))
      (shamt_128 Reg (rv_andi (value_regs_get y 0) (imm12_const 127))))
    (value_regs
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
      (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high))))


(decl gen_amode (Reg Offset32 Type) AMode)
(extern constructor gen_amode gen_amode)

(decl offset32_imm (i32) Offset32)
(extern constructor offset32_imm offset32_imm)

;; helper function to load from memory.
(decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg)
(rule
  (gen_load p offset op flags ty)
  (let
    ((tmp WritableReg (temp_writable_reg ty))
      (_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64)))))
    tmp))

(decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs)
(rule
  (gen_load_128 p offset flags)
  (let
    ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
      (high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
    (value_regs low high)))

(decl default_memflags () MemFlags)
(extern constructor default_memflags default_memflags)

(decl offset32_add (Offset32 i64) Offset32)
(extern constructor offset32_add offset32_add)

;; helper function to store to memory.
(decl gen_store (Reg Offset32 StoreOP MemFlags Reg) InstOutput)
(rule
  (gen_store base offset op flags src)
  (side_effect (SideEffectNoResult.Inst (MInst.Store (gen_amode base offset $I64) op flags src)))
)

(decl gen_store_128 (Reg Offset32 MemFlags ValueRegs) InstOutput)
(rule
  (gen_store_128 p offset flags src)
  (side_effect
    (SideEffectNoResult.Inst2
      (MInst.Store (gen_amode p offset $I64) (StoreOP.Sd) flags (value_regs_get src 0))
      (MInst.Store (gen_amode p (offset32_add offset 8) $I64) (StoreOP.Sd) flags (value_regs_get src 1)))))

(decl valid_atomic_transaction (Type) Type)
(extern extractor valid_atomic_transaction valid_atomic_transaction)

;;helper function.
;;construct an atomic instruction.
(decl gen_atomic (AtomicOP Reg Reg AMO) Reg)
(rule
  (gen_atomic op addr src amo)
  (let
    ((tmp WritableReg (temp_writable_reg $I64))
      (_ Unit (emit (MInst.Atomic op tmp addr src amo))))
    tmp))

;; helper function
(decl get_atomic_rmw_op (Type AtomicRmwOp) AtomicOP)
(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Add))
  (AtomicOP.AmoaddW))
(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Add))
  (AtomicOP.AmoaddD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.And))
  (AtomicOP.AmoandW))

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.And))
  (AtomicOP.AmoandD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Or))
  (AtomicOP.AmoorW))

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Or))
  (AtomicOP.AmoorD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax))
  (AtomicOP.AmomaxW))

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax))
  (AtomicOP.AmomaxD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin))
  (AtomicOP.AmominW))

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin))
  (AtomicOP.AmominD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax))
  (AtomicOP.AmomaxuW)
)

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax))
  (AtomicOP.AmomaxuD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin))
  (AtomicOP.AmominuW))

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin))
  (AtomicOP.AmominuD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg))
  (AtomicOP.AmoswapW))

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg))
  (AtomicOP.AmoswapD))

(rule
  (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor))
  (AtomicOP.AmoxorW))

(rule
  (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor))
  (AtomicOP.AmoxorD))

(decl atomic_amo () AMO)
(extern constructor atomic_amo atomic_amo)


(decl gen_atomic_load (Reg Type) Reg)
(rule
  (gen_atomic_load p ty)
  (let
    ((tmp WritableReg (temp_writable_reg $I64))
      (_ Unit (emit (MInst.AtomicLoad tmp ty p))))
    (writable_reg_to_reg tmp)))

;;;
(decl gen_atomic_store (Reg Type Reg) InstOutput)
(rule
  (gen_atomic_store p ty src)
  (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p)))
)


(decl move_f_to_x (Reg Type) Reg)
(extern constructor move_f_to_x move_f_to_x)

(decl move_x_to_f (Reg Type) Reg)
(extern constructor move_x_to_f move_x_to_f)

(decl gen_stack_addr (StackSlot Offset32) Reg )
(extern constructor gen_stack_addr gen_stack_addr)


;;; generate a move and reinterprete the data
;; parameter is "rs" "in_type" "out_type"
(decl gen_moves (ValueRegs Type Type) ValueRegs)
(extern constructor gen_moves gen_moves)


;;
(decl gen_select (Type Reg ValueRegs ValueRegs) ValueRegs)
(rule
  (gen_select ty c x y)
  (let
    ((dst VecWritableReg (alloc_vec_writable ty))
      ;;
      (reuslt VecWritableReg (vec_writable_clone dst))
      (_ Unit (emit (MInst.Select dst ty c x y))))
    (vec_writable_to_regs reuslt)))

;; Parameters are "intcc compare_a compare_b rs1 rs2".
(decl gen_select_reg (IntCC Reg Reg Reg Reg) Reg)
(extern constructor gen_select_reg gen_select_reg)

;; load a constant into reg.
(decl load_u64_constant (u64) Reg)
(extern constructor load_u64_constant load_u64_constant)

;;; clone WritableReg
;;; if not rust compiler will complain about use moved value.
(decl vec_writable_clone (VecWritableReg) VecWritableReg)
(extern constructor vec_writable_clone vec_writable_clone)

(decl vec_writable_to_regs (VecWritableReg) ValueRegs)
(extern constructor vec_writable_to_regs vec_writable_to_regs)

(decl alloc_vec_writable (Type) VecWritableReg)
(extern constructor alloc_vec_writable alloc_vec_writable)

(decl gen_bitselect (Type Reg Reg Reg) Reg)
(rule
  (gen_bitselect ty c x y)
  (let
    ((tmp_x Reg (rv_and c x))
      ;;;inverse condition
      (c_inverse Reg (rv_not c))
      ;;;get all y part.
      (tmp_y Reg (rv_and c_inverse y))
      ;;;get reuslt.
      (result Reg (rv_or tmp_x tmp_y)))
    result))

(decl gen_int_select (Type IntSelectOP ValueRegs ValueRegs) ValueRegs)
(rule
  (gen_int_select ty op x y)
  (let
    ( ;;;
      (dst VecWritableReg (alloc_vec_writable ty))
      ;;;
      (_ Unit (emit (MInst.IntSelect op (vec_writable_clone dst) x y ty))))
    (vec_writable_to_regs dst)))

(decl udf (TrapCode) InstOutput)
(rule
  (udf code)
  (side_effect (SideEffectNoResult.Inst (MInst.Udf code))))

(decl load_op (Type) LoadOP)
(extern constructor load_op load_op)

(decl store_op (Type) StoreOP)
(extern constructor store_op store_op)

;; bool is "is_signed"
(decl int_load_op (bool u8) LoadOP)
(rule
  (int_load_op $false 8)
  (LoadOP.Lbu))

(rule
  (int_load_op $true 8)
  (LoadOP.Lb))

(rule
  (int_load_op $false 16)
  (LoadOP.Lhu))
(rule
  (int_load_op $true 16)
  (LoadOP.Lh))
(rule
  (int_load_op $false 32)
  (LoadOP.Lwu))
(rule
  (int_load_op $true 32)
  (LoadOP.Lw))

(rule
  (int_load_op _ 64)
  (LoadOP.Ld))

;;;; load extern name
(decl load_ext_name (ExternalName i64) Reg)
(extern constructor load_ext_name load_ext_name)

(decl int_convert_2_float_op (Type bool Type) FpuOPRR)
(extern constructor int_convert_2_float_op int_convert_2_float_op)

;;;;
(decl gen_fcvt_int (bool Reg bool Type Type) Reg)
(rule
  (gen_fcvt_int is_sat rs is_signed in_type out_type)
  (let
    ((result WritableReg (temp_writable_reg out_type))
      (tmp WritableReg (temp_writable_reg $F64))
      (_ Unit (emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type))))
    result))

;;; some float binary operation
;;; 1. need move into x reister.
;;; 2. do the operation.
;;; 3. move back.
(decl lower_float_binary (AluOPRRR Reg Reg Type) Reg)
(rule
  (lower_float_binary op rs1 rs2 ty)
  (let
    ((x_rs1 Reg (move_f_to_x rs1 ty))
      (x_rs2 Reg (move_f_to_x rs2 ty))
      ;;;
      (tmp Reg (alu_rrr op x_rs1 x_rs2)))
    ;;; move back.
    (move_x_to_f tmp ty)))

;;;;
(decl lower_float_bnot (Reg Type) Reg)
(rule
  (lower_float_bnot x ty)
  (let
    (;; move to x register.
      (tmp Reg (move_f_to_x x ty))
      ;; inverse all bits.
      (tmp2 Reg (rv_not tmp)))
    ;; move back to float register.
    (move_x_to_f tmp2 ty)))


;;; lower icmp
(decl lower_icmp (IntCC ValueRegs ValueRegs Type) Reg)
(rule 1 (lower_icmp cc x y ty)
  (if (signed_cond_code cc))
  (gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty))
(rule (lower_icmp cc x y ty)
  (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty))


(decl i128_sub (ValueRegs ValueRegs) ValueRegs)
(rule
  (i128_sub x y )
  (let
    (;; low part.
      (low Reg (rv_sub (value_regs_get x 0) (value_regs_get y 0)))
      ;; compute borrow.
      (borrow Reg (rv_sltu (value_regs_get x 0) low))
      ;;
      (high_tmp Reg (rv_sub (value_regs_get x 1) (value_regs_get y 1)))
      ;;
      (high Reg (rv_sub high_tmp borrow)))
    (value_regs low high)))


;;; Returns the sum in the first register, and the overflow test in the second.
(decl lower_uadd_overflow (Reg Reg Type) ValueRegs)

(rule 1
  (lower_uadd_overflow x y $I64)
  (let ((tmp Reg (rv_add x y))
        (test Reg (gen_icmp (IntCC.UnsignedLessThan) tmp x $I64)))
    (value_regs tmp test)))

(rule
  (lower_uadd_overflow x y (fits_in_32 ty))
  (let ((tmp_x Reg (ext_int_if_need $false x ty))
        (tmp_y Reg (ext_int_if_need $false y ty))
        (sum Reg (rv_add tmp_x tmp_y))
        (test Reg (rv_srli sum (imm12_const (ty_bits ty)))))
    (value_regs sum test)))

(decl label_to_br_target (MachLabel) BranchTarget)
(extern constructor label_to_br_target label_to_br_target)

(decl gen_jump (MachLabel) MInst)
(rule
  (gen_jump v)
  (MInst.Jal (label_to_br_target v)))

(decl vec_label_get (VecMachLabel u8) MachLabel )
(extern constructor vec_label_get vec_label_get)

(decl partial lower_branch (Inst VecMachLabel) Unit)
(rule (lower_branch (jump _) targets )
      (emit_side_effect (SideEffectNoResult.Inst (gen_jump (vec_label_get targets 0)))))

;;; cc a b targets Type
(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit)
(extern constructor lower_br_icmp lower_br_icmp)

;; int scalar zero regs.
(decl int_zero_reg (Type) ValueRegs)
(extern constructor int_zero_reg int_zero_reg)

(decl lower_cond_br (IntCC ValueRegs VecMachLabel Type) Unit)
(extern constructor lower_cond_br lower_cond_br)

(decl intcc_to_extend_op (IntCC) ExtendOp)
(extern constructor intcc_to_extend_op intcc_to_extend_op)

;; Normalize a value for comparision.
;;
;; This ensures that types smaller than a register don't accidentally
;; pass undefined high bits when being compared as a full register.
(decl normalize_cmp_value (Type ValueRegs ExtendOp) ValueRegs)

(rule 1 (normalize_cmp_value (fits_in_32 ity) r op)
      (extend r op ity $I64))

(rule (normalize_cmp_value $I64  r _) r)
(rule (normalize_cmp_value $I128 r _) r)

(decl normalize_fcvt_from_int (ValueRegs Type ExtendOp) ValueRegs)
(rule 2 (normalize_fcvt_from_int r (fits_in_16 ty) op)
  (extend r op ty $I64))
(rule 1 (normalize_fcvt_from_int r _ _)
  r)

;; Convert a truthy value, possibly of more than one register (an
;; I128), to one register. If narrower than 64 bits, must have already
;; been masked (e.g. by `normalize_cmp_value`).
(decl truthy_to_reg (Type ValueRegs) Reg)
(rule 1 (truthy_to_reg (fits_in_64 _) regs)
      (value_regs_get regs 0))
(rule 0 (truthy_to_reg $I128 regs)
      (let ((lo Reg (value_regs_get regs 0))
            (hi Reg (value_regs_get regs 1)))
        (rv_or lo hi)))

;; Default behavior for branching based on an input value.
(rule
  (lower_branch (brif v @ (value_type ty) _ _) targets)
  (lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v (ExtendOp.Zero)) targets ty))

;; Special case for SI128 to reify the comparison value and branch on it.
(rule 2
  (lower_branch (brif v @ (value_type $I128) _ _) targets)
  (let ((zero ValueRegs (value_regs (zero_reg) (zero_reg)))
        (cmp Reg (gen_icmp (IntCC.NotEqual) v zero $I128)))
    (lower_cond_br (IntCC.NotEqual) cmp targets $I64)))

;; Branching on the result of an icmp
(rule 1
  (lower_branch (brif (maybe_uextend (icmp cc a @ (value_type ty) b)) _ _) targets)
  (lower_br_icmp cc a b targets ty))

;; Branching on the result of an fcmp
(rule 1
  (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
  (if-let $true (floatcc_unordered cc))
  (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
        (else BranchTarget (label_to_br_target (vec_label_get targets 1))))
    (emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) else then))))

(rule 1
  (lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
  (if-let $false (floatcc_unordered cc))
  (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
        (else BranchTarget (label_to_br_target (vec_label_get targets 1))))
    (emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))

;;;
(decl lower_br_table (Reg VecMachLabel) Unit)
(extern constructor lower_br_table lower_br_table)

(rule
  (lower_branch (br_table index _) targets)
  (lower_br_table index targets))

(decl load_ra () Reg)
(extern constructor load_ra load_ra)

(decl gen_rev8 (Reg) Reg)
(rule 1
  (gen_rev8 rs)
  (if-let $true (has_zbb))
  (rv_rev8 rs))

(rule
  (gen_rev8 rs)
  (if-let $false (has_zbb))
  (let
    ((rd WritableReg (temp_writable_reg $I64))
      (tmp WritableReg (temp_writable_reg $I64))
      (step WritableReg (temp_writable_reg $I64))
      (_ Unit (emit (MInst.Rev8 rs step tmp rd))))
    (writable_reg_to_reg rd)))


(decl gen_brev8 (Reg Type) Reg)
(rule 1
  (gen_brev8 rs _)
  (if-let $true (has_zbkb))
  (rv_brev8 rs))
(rule
  (gen_brev8 rs ty)
  (if-let $false (has_zbkb))
  (let
    ((tmp WritableReg (temp_writable_reg $I64))
      (tmp2 WritableReg (temp_writable_reg $I64))
      (step WritableReg (temp_writable_reg $I64))
      (rd WritableReg (temp_writable_reg $I64))
      (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd))))
    (writable_reg_to_reg rd)))

;; Negates x
;; Equivalent to 0 - x
(decl neg (Type ValueRegs) ValueRegs)
(rule 1 (neg (fits_in_64 (ty_int ty)) val)
  (value_reg
    (rv_neg (value_regs_get val 0))))

(rule 2 (neg $I128 val)
  (i128_sub (value_regs_zero) val))


;; Selects the greatest of two registers as signed values.
(decl max (Type Reg Reg) Reg)
(rule (max (fits_in_64 (ty_int ty)) x y)
  (if-let $true (has_zbb))
  (rv_max x y))

(rule (max (fits_in_64 (ty_int ty)) x y)
  (if-let $false (has_zbb))
  (gen_select_reg (IntCC.SignedGreaterThan) x y x y))


(decl lower_iabs (Type Reg) Reg)

; I64 and lower
; Generate the following code:
;   sext.{b,h,w} a0, a0
;   neg a1, a0
;   max a0, a0, a1
(rule (lower_iabs (fits_in_64 ty) val)
  (let ((extended Reg (ext_int_if_need $true val ty))
        (negated Reg (neg $I64 extended)))
    (max $I64 extended negated)))

(decl gen_trapif (Reg TrapCode) InstOutput)
(rule
  (gen_trapif test trap_code)
  (side_effect (SideEffectNoResult.Inst (MInst.TrapIf test trap_code))))

(decl gen_trapifc (IntCC Reg Reg TrapCode) InstOutput)
(rule
  (gen_trapifc cc a b trap_code)
  (side_effect (SideEffectNoResult.Inst (MInst.TrapIfC a b cc trap_code))))

(decl shift_int_to_most_significant (Reg Type) Reg)
(extern constructor shift_int_to_most_significant shift_int_to_most_significant)

;;; generate div overflow.
(decl gen_div_overflow (Reg Reg Type) InstOutput)
(rule
  (gen_div_overflow rs1 rs2 ty)
  (let
    ((r_const_neg_1 Reg (load_imm12 -1))
      (r_const_min Reg (rv_slli (load_imm12 1) (imm12_const 63)))
      (tmp_rs1 Reg (shift_int_to_most_significant rs1 ty))
      (t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty))
      (t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty))
      (test Reg (rv_and t1 t2)))
    (gen_trapif test (TrapCode.IntegerOverflow))))

(decl gen_div_by_zero (Reg) InstOutput)
(rule
  (gen_div_by_zero r)
  (gen_trapifc (IntCC.Equal) (zero_reg) r (TrapCode.IntegerDivisionByZero)))

;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput)
(extern constructor gen_call gen_call)

(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput)
(extern constructor gen_call_indirect gen_call_indirect)

;;; this is trying to imitate aarch64 `madd` instruction.
(decl madd (Reg Reg Reg) Reg)
(rule
  (madd n m a)
  (let
    ((t Reg (rv_mul n m)))
    (rv_add t a)))

;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl lower_bmask (Type Type ValueRegs) ValueRegs)

;; Produces -1 if the 64-bit value is non-zero, and 0 otherwise.
;; If the type is smaller than 64 bits, we need to mask off the
;; high bits.
(rule
  0
  (lower_bmask (fits_in_64 _) (fits_in_64 in_ty) val)
  (let ((input Reg (normalize_cmp_value in_ty val (ExtendOp.Zero)))
        (non_zero Reg (rv_snez input)))
    (value_reg (rv_neg non_zero))))

;; Bitwise-or the two registers that make up the 128-bit value, then recurse as
;; though it was a 64-bit value.
(rule
  1
  (lower_bmask (fits_in_64 ty) $I128 val)
  (let ((lo Reg (value_regs_get val 0))
        (hi Reg (value_regs_get val 1))
        (combined Reg (rv_or lo hi)))
    (lower_bmask ty $I64 (value_reg combined))))

;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of the
;; bmask of the 64-bit value into both result registers of the i128.
(rule
  2
  (lower_bmask $I128 (fits_in_64 in_ty) val)
  (let ((res ValueRegs (lower_bmask $I64 in_ty val)))
    (value_regs (value_regs_get res 0) (value_regs_get res 0))))

;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of
;; bmasking the 128-bit value to a 64-bit value into both registers of the
;; 128-bit result.
(rule
  3
  (lower_bmask $I128 $I128 val)
  (let ((res ValueRegs (lower_bmask $I64 $I128 val)))
    (value_regs (value_regs_get res 0) (value_regs_get res 0))))


;;;; Helpers for physical registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl gen_mov_from_preg (PReg) Reg)

(rule
  (gen_mov_from_preg rm)
  (let ((rd WritableReg (temp_writable_reg $I64))
        (_ Unit (emit (MInst.MovFromPReg rd rm))))
    rd))

(decl fp_reg () PReg)
(extern constructor fp_reg fp_reg)

(decl sp_reg () PReg)
(extern constructor sp_reg sp_reg)

;; Helper for creating the zero register.
(decl zero_reg () Reg)
(extern constructor zero_reg zero_reg)

(decl value_regs_zero () ValueRegs)
(rule (value_regs_zero)
  (value_regs (imm $I64 0) (imm $I64 0)))

(decl writable_zero_reg () WritableReg)
(extern constructor writable_zero_reg writable_zero_reg)


;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl not (Reg) Reg)
(rule (not x) (rv_xori x (imm_from_bits 1)))

(decl is_not_nan (Type Reg) Reg)
(rule (is_not_nan ty a) (rv_feq ty a a))

(decl ordered (Type Reg Reg) Reg)
(rule (ordered ty a b) (rv_and (is_not_nan ty a) (is_not_nan ty b)))

(type CmpResult (enum
                  (Result
                    (result Reg)
                    (invert bool))))

;; Wrapper for the common case when constructing comparison results. It assumes
;; that the result isn't negated.
(decl cmp_result (Reg) CmpResult)
(rule (cmp_result result) (CmpResult.Result result $false))

;; Wrapper for the case where it's more convenient to construct the negated
;; version of the comparison.
(decl cmp_result_invert (Reg) CmpResult)
(rule (cmp_result_invert result) (CmpResult.Result result $true))

;; Consume a CmpResult, producing a branch on its result.
(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult)
(rule (cond_br cmp then else)
      (SideEffectNoResult.Inst
        (MInst.CondBr then else (cmp_integer_compare cmp))))

;; Construct an IntegerCompare value.
(decl int_compare (IntCC Reg Reg) IntegerCompare)
(extern constructor int_compare int_compare)

;; Convert a comparison into a branch test.
(decl cmp_integer_compare (CmpResult) IntegerCompare)

(rule
  (cmp_integer_compare (CmpResult.Result res $false))
  (int_compare (IntCC.NotEqual) res (zero_reg)))

(rule
  (cmp_integer_compare (CmpResult.Result res $true))
  (int_compare (IntCC.Equal) res (zero_reg)))

;; Convert a comparison into a boolean value.
(decl cmp_value (CmpResult) Reg)
(rule (cmp_value (CmpResult.Result res $false)) res)
(rule (cmp_value (CmpResult.Result res $true)) (not res))

;; Compare two floating point numbers and return a zero/non-zero result.
(decl emit_fcmp (FloatCC Type Reg Reg) CmpResult)

;; a is not nan && b is not nan
(rule
  (emit_fcmp (FloatCC.Ordered) ty a b)
  (cmp_result (ordered ty a b)))

;; a is nan || b is nan
;; == !(a is not nan && b is not nan)
(rule
  (emit_fcmp (FloatCC.Unordered) ty a b)
  (cmp_result_invert (ordered ty a b)))

;; a == b
(rule
  (emit_fcmp (FloatCC.Equal) ty a b)
  (cmp_result (rv_feq ty a b)))

;; a != b
;; == !(a == b)
(rule
  (emit_fcmp (FloatCC.NotEqual) ty a b)
  (cmp_result_invert (rv_feq ty a b)))

;; a < b || a > b
(rule
  (emit_fcmp (FloatCC.OrderedNotEqual) ty a b)
  (cmp_result (rv_or (rv_flt ty a b) (rv_fgt ty a b))))

;; !(ordered a b) || a == b
(rule
  (emit_fcmp (FloatCC.UnorderedOrEqual) ty a b)
  (cmp_result (rv_or (not (ordered ty a b)) (rv_feq ty a b))))

;; a < b
(rule
  (emit_fcmp (FloatCC.LessThan) ty a b)
  (cmp_result (rv_flt ty a b)))

;; a <= b
(rule
  (emit_fcmp (FloatCC.LessThanOrEqual) ty a b)
  (cmp_result (rv_fle ty a b)))

;; a > b
(rule
  (emit_fcmp (FloatCC.GreaterThan) ty a b)
  (cmp_result (rv_fgt ty a b)))

;; a >= b
(rule
  (emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b)
  (cmp_result (rv_fge ty a b)))

;; !(ordered a b) || a < b
;; == !(ordered a b && a >= b)
(rule
  (emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b)
  (cmp_result_invert (rv_and (ordered ty a b) (rv_fge ty a b))))

;; !(ordered a b) || a <= b
;; == !(ordered a b && a > b)
(rule
  (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b)
  (cmp_result_invert (rv_and (ordered ty a b) (rv_fgt ty a b))))

;; !(ordered a b) || a > b
;; == !(ordered a b && a <= b)
(rule
  (emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b)
  (cmp_result_invert (rv_and (ordered ty a b) (rv_fle ty a b))))

;; !(ordered a b) || a >= b
;; == !(ordered a b && a < b)
(rule
  (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b)
  (cmp_result_invert (rv_and (ordered ty a b) (rv_flt ty a b))))