add riscv64 backend for cranelift. (#4271)

Add a RISC-V 64 (`riscv64`, RV64GC) backend. Co-authored-by: yuyang <756445638@qq.com> Co-authored-by: Chris Fallin <chris@cfallin.org> Co-authored-by: Afonso Bordado <afonsobordado@az8.co>
2022-09-28 08:30:31 +08:00
parent 9715d91c50
commit cdecc858b4
182 changed files with 21024 additions and 36 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -233,6 +233,12 @@ jobs:
            gcc: s390x-linux-gnu-gcc
            qemu: qemu-s390x -L /usr/s390x-linux-gnu
            qemu_target: s390x-linux-user
+          - os: ubuntu-latest
+            target: riscv64gc-unknown-linux-gnu
+            gcc_package: gcc-riscv64-linux-gnu
+            gcc: riscv64-linux-gnu-gcc
+            qemu: qemu-riscv64 -L /usr/riscv64-linux-gnu
+            qemu_target: riscv64-linux-user
    steps:
    - uses: actions/checkout@v2
      with:
@@ -401,6 +407,9 @@ jobs:
        - build: s390x-linux
          os: ubuntu-latest
          target: s390x-unknown-linux-gnu
+        - build: riscv64gc-linux
+          os: ubuntu-latest
+          target: riscv64gc-unknown-linux-gnu
    steps:
    - uses: actions/checkout@v2
      with:
--- a/build.rs
+++ b/build.rs
@@ -172,6 +172,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
            // FIXME: These tests fail under qemu due to a qemu bug.
            (_, "simd_f32x4_pmin_pmax") if platform_is_s390x() => return true,
            (_, "simd_f64x2_pmin_pmax") if platform_is_s390x() => return true,
+            // riscv64 backend does not yet have a fully complete SIMD backend.
+            ("simd", _) if platform_is_riscv64() => return true,
+            ("memory64", "simd") if platform_is_riscv64() => return true,
            _ => {}
        },
        _ => panic!("unrecognized strategy"),
@@ -183,3 +186,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
 fn platform_is_s390x() -> bool {
    env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "s390x"
 }
+
+fn platform_is_riscv64() -> bool {
+    env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "riscv64"
+}
--- a/ci/docker/riscv64gc-linux/Dockerfile
+++ b/ci/docker/riscv64gc-linux/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:22.04
+
+RUN apt-get update -y && apt-get install -y gcc gcc-riscv64-linux-gnu ca-certificates
+
+ENV PATH=$PATH:/rust/bin
+ENV CARGO_BUILD_TARGET=riscv64gc-unknown-linux-gnu
+ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -68,7 +68,7 @@ unwind = ["gimli"]
 x86 = []
 arm64 = []
 s390x = []
-
+riscv64 = []
 # Stub feature that does nothing, for Cargo-features compatibility: the new
 # backend is the default now.
 experimental_x64 = []
@@ -77,7 +77,8 @@ experimental_x64 = []
 all-arch = [
    "x86",
    "arm64",
-    "s390x"
+    "s390x",
+    "riscv64"
 ]

 # For dependent crates that want to serialize some parts of cranelift
--- a/cranelift/codegen/build.rs
+++ b/cranelift/codegen/build.rs
@@ -187,6 +187,8 @@ fn get_isle_compilations(
    let src_isa_s390x =
        make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("s390x"));

+    let src_isa_risc_v =
+        make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("riscv64"));
    // This is a set of ISLE compilation units.
    //
    // The format of each entry is:
@@ -234,6 +236,16 @@ fn get_isle_compilations(
                ],
                untracked_inputs: vec![clif_isle.clone()],
            },
+            // The risc-v instruction selector.
+            IsleCompilation {
+                output: out_dir.join("isle_riscv64.rs"),
+                inputs: vec![
+                    prelude_isle.clone(),
+                    src_isa_risc_v.join("inst.isle"),
+                    src_isa_risc_v.join("lower.isle"),
+                ],
+                untracked_inputs: vec![clif_isle.clone()],
+            },
        ],
    })
 }
--- a/cranelift/codegen/meta/src/isa/mod.rs
+++ b/cranelift/codegen/meta/src/isa/mod.rs
@@ -4,6 +4,7 @@ use crate::shared::Definitions as SharedDefinitions;
 use std::fmt;

 mod arm64;
+mod riscv64;
 mod s390x;
 pub(crate) mod x86;

@@ -13,6 +14,7 @@ pub enum Isa {
    X86,
    Arm64,
    S390x,
+    Riscv64,
 }

 impl Isa {
@@ -30,13 +32,14 @@ impl Isa {
            "aarch64" => Some(Isa::Arm64),
            "s390x" => Some(Isa::S390x),
            x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86),
+            "riscv64" | "riscv64gc" | "riscv64imac" => Some(Isa::Riscv64),
            _ => None,
        }
    }

    /// Returns all supported isa targets.
    pub fn all() -> &'static [Isa] {
-        &[Isa::X86, Isa::Arm64, Isa::S390x]
+        &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64]
    }
 }

@@ -47,6 +50,7 @@ impl fmt::Display for Isa {
            Isa::X86 => write!(f, "x86"),
            Isa::Arm64 => write!(f, "arm64"),
            Isa::S390x => write!(f, "s390x"),
+            Isa::Riscv64 => write!(f, "riscv64"),
        }
    }
 }
@@ -57,6 +61,7 @@ pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec<T
            Isa::X86 => x86::define(shared_defs),
            Isa::Arm64 => arm64::define(shared_defs),
            Isa::S390x => s390x::define(shared_defs),
+            Isa::Riscv64 => riscv64::define(shared_defs),
        })
        .collect()
 }
--- a/cranelift/codegen/meta/src/isa/riscv64.rs
+++ b/cranelift/codegen/meta/src/isa/riscv64.rs
@@ -0,0 +1,27 @@
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
+
+use crate::shared::Definitions as SharedDefinitions;
+
+fn define_settings(_shared: &SettingGroup) -> SettingGroup {
+    let mut setting = SettingGroupBuilder::new("riscv64");
+
+    let _has_m = setting.add_bool("has_m", "has extension M?", "", false);
+    let _has_a = setting.add_bool("has_a", "has extension A?", "", false);
+    let _has_f = setting.add_bool("has_f", "has extension F?", "", false);
+    let _has_d = setting.add_bool("has_d", "has extension D?", "", false);
+    let _has_v = setting.add_bool("has_v", "has extension V?", "", false);
+    let _has_b = setting.add_bool("has_b", "has extension B?", "", false);
+    let _has_c = setting.add_bool("has_c", "has extension C?", "", false);
+    let _has_zbkb = setting.add_bool("has_zbkb", "has extension zbkb?", "", false);
+
+    let _has_zicsr = setting.add_bool("has_zicsr", "has extension zicsr?", "", false);
+    let _has_zifencei = setting.add_bool("has_zifencei", "has extension zifencei?", "", false);
+
+    setting.build()
+}
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+    let settings = define_settings(&shared_defs.settings);
+    TargetIsa::new("riscv64", settings)
+}
--- a/cranelift/codegen/src/binemit/mod.rs
+++ b/cranelift/codegen/src/binemit/mod.rs
@@ -66,6 +66,13 @@ pub enum Reloc {
    /// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12_NC` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage)
    Aarch64TlsGdAddLo12Nc,

+    /// procedure call.
+    /// call symbol
+    /// expands to the following assembly and relocation:
+    /// auipc ra, 0
+    /// jalr ra, ra, 0
+    RiscvCall,
+
    /// s390x TLS GD64 - 64-bit offset of tls_index for GD symbol in GOT
    S390xTlsGd64,
    /// s390x TLS GDCall - marker to enable optimization of TLS calls
@@ -87,6 +94,7 @@ impl fmt::Display for Reloc {
            Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"),
            Self::X86SecRel => write!(f, "SecRel"),
            Self::Arm32Call | Self::Arm64Call => write!(f, "Call"),
+            Self::RiscvCall => write!(f, "RiscvCall"),

            Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"),
            Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"),
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -66,6 +66,9 @@ pub mod x64;
 #[cfg(feature = "arm64")]
 pub(crate) mod aarch64;

+#[cfg(feature = "riscv64")]
+pub mod riscv64;
+
 #[cfg(feature = "s390x")]
 mod s390x;

@@ -97,6 +100,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
        }
        Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple),
        Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple),
+        Architecture::Riscv64 { .. } => isa_builder!(riscv64, (feature = "riscv64"), triple),
        _ => Err(LookupError::Unsupported),
    }
 }
--- a/cranelift/codegen/src/isa/riscv64/abi.rs
+++ b/cranelift/codegen/src/isa/riscv64/abi.rs
@@ -0,0 +1,716 @@
+//! Implementation of a standard Riscv64 ABI.
+
+use crate::ir;
+use crate::ir::types::*;
+
+use crate::ir::ExternalName;
+use crate::ir::MemFlags;
+use crate::isa;
+
+use crate::isa::riscv64::{inst::EmitState, inst::*};
+use crate::isa::CallConv;
+use crate::machinst::*;
+
+use crate::ir::types::I8;
+use crate::ir::LibCall;
+use crate::ir::Signature;
+use crate::isa::riscv64::settings::Flags as RiscvFlags;
+use crate::isa::unwind::UnwindInst;
+use crate::settings;
+use crate::CodegenError;
+use crate::CodegenResult;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use regalloc2::PRegSet;
+use regs::x_reg;
+
+use smallvec::{smallvec, SmallVec};
+
+/// Support for the Riscv64 ABI from the callee side (within a function body).
+pub(crate) type Riscv64Callee = Callee<Riscv64MachineDeps>;
+
+/// Support for the Riscv64 ABI from the caller side (at a callsite).
+pub(crate) type Riscv64ABICaller = Caller<Riscv64MachineDeps>;
+
+/// This is the limit for the size of argument and return-value areas on the
+/// stack. We place a reasonable limit here to avoid integer overflow issues
+/// with 32-bit arithmetic: for now, 128 MB.
+static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
+
+/// Riscv64-specific ABI behavior. This struct just serves as an implementation
+/// point for the trait; it is never actually instantiated.
+pub struct Riscv64MachineDeps;
+
+impl IsaFlags for RiscvFlags {}
+
+impl ABIMachineSpec for Riscv64MachineDeps {
+    type I = Inst;
+    type F = RiscvFlags;
+
+    fn word_bits() -> u32 {
+        64
+    }
+
+    /// Return required stack alignment in bytes.
+    fn stack_align(_call_conv: isa::CallConv) -> u32 {
+        16
+    }
+
+    fn compute_arg_locs(
+        call_conv: isa::CallConv,
+        _flags: &settings::Flags,
+        params: &[ir::AbiParam],
+        args_or_rets: ArgsOrRets,
+        add_ret_area_ptr: bool,
+    ) -> CodegenResult<(ABIArgVec, i64, Option<usize>)> {
+        // All registers that can be used as parameters or rets.
+        // both start and end are included.
+        let (x_start, x_end, f_start, f_end) = if args_or_rets == ArgsOrRets::Args {
+            (10, 17, 10, 17)
+        } else {
+            let end = if call_conv.extends_wasmtime() { 10 } else { 11 };
+            (10, end, 10, end)
+        };
+        let mut next_x_reg = x_start;
+        let mut next_f_reg = f_start;
+        // Stack space.
+        let mut next_stack: u64 = 0;
+        let mut ret = smallvec![];
+        let mut return_one_register_used = false;
+
+        for param in params {
+            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
+                let offset = next_stack;
+                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
+                next_stack += size as u64;
+                ret.push(ABIArg::StructArg {
+                    pointer: None,
+                    offset: offset as i64,
+                    size: size as u64,
+                    purpose: param.purpose,
+                });
+                continue;
+            }
+
+            // Find regclass(es) of the register(s) used to store a value of this type.
+            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
+            let mut slots = ABIArgSlotVec::new();
+            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
+                let next_reg =
+                    if (next_x_reg <= x_end) && *rc == RegClass::Int && !return_one_register_used {
+                        let x = Some(x_reg(next_x_reg));
+                        if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
+                            return_one_register_used = true;
+                        }
+                        next_x_reg += 1;
+                        x
+                    } else if (next_f_reg <= f_end)
+                        && *rc == RegClass::Float
+                        && !return_one_register_used
+                    {
+                        let x = Some(f_reg(next_f_reg));
+                        if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
+                            return_one_register_used = true;
+                        }
+                        next_f_reg += 1;
+                        x
+                    } else {
+                        None
+                    };
+                if let Some(reg) = next_reg {
+                    slots.push(ABIArgSlot::Reg {
+                        reg: reg.to_real_reg().unwrap(),
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                } else {
+                    // Compute size. For the wasmtime ABI it differs from native
+                    // ABIs in how multiple values are returned, so we take a
+                    // leaf out of arm64's book by not rounding everything up to
+                    // 8 bytes. For all ABI arguments, and other ABI returns,
+                    // though, each slot takes a minimum of 8 bytes.
+                    //
+                    // Note that in all cases 16-byte stack alignment happens
+                    // separately after all args.
+                    let size = (reg_ty.bits() / 8) as u64;
+                    let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
+                        size
+                    } else {
+                        std::cmp::max(size, 8)
+                    };
+                    // Align.
+                    debug_assert!(size.is_power_of_two());
+                    next_stack = align_to(next_stack, size);
+                    slots.push(ABIArgSlot::Stack {
+                        offset: next_stack as i64,
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                    next_stack += size;
+                }
+            }
+            ret.push(ABIArg::Slots {
+                slots,
+                purpose: param.purpose,
+            });
+        }
+        let pos: Option<usize> = if add_ret_area_ptr {
+            assert!(ArgsOrRets::Args == args_or_rets);
+            if next_x_reg <= x_end {
+                let arg = ABIArg::reg(
+                    x_reg(next_x_reg).to_real_reg().unwrap(),
+                    I64,
+                    ir::ArgumentExtension::None,
+                    ir::ArgumentPurpose::Normal,
+                );
+                ret.push(arg);
+            } else {
+                let arg = ABIArg::stack(
+                    next_stack as i64,
+                    I64,
+                    ir::ArgumentExtension::None,
+                    ir::ArgumentPurpose::Normal,
+                );
+                ret.push(arg);
+                next_stack += 8;
+            }
+            Some(ret.len() - 1)
+        } else {
+            None
+        };
+        next_stack = align_to(next_stack, Self::stack_align(call_conv) as u64);
+        // To avoid overflow issues, limit the arg/return size to something
+        // reasonable -- here, 128 MB.
+        if next_stack > STACK_ARG_RET_SIZE_LIMIT {
+            return Err(CodegenError::ImplLimitExceeded);
+        }
+        CodegenResult::Ok((ret, next_stack as i64, pos))
+    }
+
+    fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
+        // lr fp.
+        16
+    }
+
+    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
+        Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
+    }
+
+    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
+        Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
+    }
+
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+        Inst::gen_move(to_reg, from_reg, ty)
+    }
+
+    fn gen_extend(
+        to_reg: Writable<Reg>,
+        from_reg: Reg,
+        signed: bool,
+        from_bits: u8,
+        to_bits: u8,
+    ) -> Inst {
+        assert!(from_bits < to_bits);
+        Inst::Extend {
+            rd: to_reg,
+            rn: from_reg,
+            signed,
+            from_bits,
+            to_bits,
+        }
+    }
+
+    fn get_ext_mode(
+        _call_conv: isa::CallConv,
+        specified: ir::ArgumentExtension,
+    ) -> ir::ArgumentExtension {
+        specified
+    }
+
+    fn gen_args(_isa_flags: &crate::isa::riscv64::settings::Flags, args: Vec<ArgPair>) -> Inst {
+        Inst::Args { args }
+    }
+
+    fn gen_ret(_setup_frame: bool, _isa_flags: &Self::F, rets: Vec<Reg>) -> Inst {
+        Inst::Ret { rets }
+    }
+
+    fn get_stacklimit_reg() -> Reg {
+        spilltmp_reg()
+    }
+
+    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
+            insts.push(Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Andi,
+                rd: into_reg,
+                rs: from_reg,
+                imm12,
+            });
+        } else {
+            insts.extend(Inst::load_constant_u32(
+                writable_spilltmp_reg2(),
+                imm as u64,
+            ));
+            insts.push(Inst::AluRRR {
+                alu_op: AluOPRRR::Add,
+                rd: into_reg,
+                rs1: spilltmp_reg2(),
+                rs2: from_reg,
+            });
+        }
+        insts
+    }
+
+    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+        insts.push(Inst::TrapIfC {
+            cc: IntCC::UnsignedLessThan,
+            rs1: stack_reg(),
+            rs2: limit_reg,
+            trap_code: ir::TrapCode::StackOverflow,
+        });
+        insts
+    }
+
+    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
+        Inst::LoadAddr {
+            rd: into_reg,
+            mem: mem.into(),
+        }
+    }
+
+    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64, ty);
+        Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
+    }
+
+    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64, ty);
+        Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
+    }
+
+    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+        if amount == 0 {
+            return insts;
+        }
+        insts.push(Inst::AjustSp {
+            amount: amount as i64,
+        });
+        insts
+    }
+
+    fn gen_nominal_sp_adj(offset: i32) -> Inst {
+        Inst::VirtualSPOffsetAdj {
+            amount: offset as i64,
+        }
+    }
+
+    fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst> {
+        // add  sp,sp,-16    ;; alloc stack space for fp.
+        // sd   ra,8(sp)     ;; save ra.
+        // sd   fp,0(sp)     ;; store old fp.
+        // mv   fp,sp        ;; set fp to sp.
+        let mut insts = SmallVec::new();
+        insts.push(Inst::AjustSp { amount: -16 });
+        insts.push(Self::gen_store_stack(
+            StackAMode::SPOffset(8, I64),
+            link_reg(),
+            I64,
+        ));
+        insts.push(Self::gen_store_stack(
+            StackAMode::SPOffset(0, I64),
+            fp_reg(),
+            I64,
+        ));
+        if flags.unwind_info() {
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::PushFrameRegs {
+                    offset_upward_to_caller_sp: 16, // FP, LR
+                },
+            });
+        }
+        insts.push(Inst::Mov {
+            rd: writable_fp_reg(),
+            rm: stack_reg(),
+            ty: I64,
+        });
+        insts
+    }
+    /// reverse of gen_prologue_frame_setup.
+    fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+        insts.push(Self::gen_load_stack(
+            StackAMode::SPOffset(8, I64),
+            writable_link_reg(),
+            I64,
+        ));
+        insts.push(Self::gen_load_stack(
+            StackAMode::SPOffset(0, I64),
+            writable_fp_reg(),
+            I64,
+        ));
+        insts.push(Inst::AjustSp { amount: 16 });
+        insts
+    }
+
+    fn gen_probestack(frame_size: u32) -> SmallInstVec<Self::I> {
+        let mut insts = SmallVec::new();
+        insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64));
+        insts.push(Inst::Call {
+            info: Box::new(CallInfo {
+                dest: ExternalName::LibCall(LibCall::Probestack),
+                uses: smallvec![CallArgPair {
+                    vreg: a0(),
+                    preg: a0(),
+                }],
+                defs: smallvec![],
+                clobbers: PRegSet::empty(),
+                opcode: Opcode::Call,
+                callee_callconv: CallConv::SystemV,
+                caller_callconv: CallConv::SystemV,
+            }),
+        });
+        insts
+    }
+    // Returns stack bytes used as well as instructions. Does not adjust
+    // nominal SP offset; abi_impl generic code will do that.
+    fn gen_clobber_save(
+        _call_conv: isa::CallConv,
+        setup_frame: bool,
+        flags: &settings::Flags,
+        clobbered_callee_saves: &[Writable<RealReg>],
+        fixed_frame_storage_size: u32,
+        _outgoing_args_size: u32,
+    ) -> (u64, SmallVec<[Inst; 16]>) {
+        let mut insts = SmallVec::new();
+        let clobbered_size = compute_clobber_size(&clobbered_callee_saves);
+        // Adjust the stack pointer downward for clobbers and the function fixed
+        // frame (spillslots and storage slots).
+        let stack_size = fixed_frame_storage_size + clobbered_size;
+        if flags.unwind_info() && setup_frame {
+            // The *unwind* frame (but not the actual frame) starts at the
+            // clobbers, just below the saved FP/LR pair.
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::DefineNewFrame {
+                    offset_downward_to_clobbers: clobbered_size,
+                    offset_upward_to_caller_sp: 16, // FP, LR
+                },
+            });
+        }
+        // Store each clobbered register in order at offsets from SP,
+        // placing them above the fixed frame slots.
+        if stack_size > 0 {
+            // since we use fp, we didn't need use UnwindInst::StackAlloc.
+            let mut cur_offset = 8;
+            for reg in clobbered_callee_saves {
+                let r_reg = reg.to_reg();
+                let ty = match r_reg.class() {
+                    regalloc2::RegClass::Int => I64,
+                    regalloc2::RegClass::Float => F64,
+                };
+                if flags.unwind_info() {
+                    insts.push(Inst::Unwind {
+                        inst: UnwindInst::SaveReg {
+                            clobber_offset: clobbered_size - cur_offset,
+                            reg: r_reg,
+                        },
+                    });
+                }
+                insts.push(Self::gen_store_stack(
+                    StackAMode::SPOffset(-(cur_offset as i64), ty),
+                    real_reg_to_reg(reg.to_reg()),
+                    ty,
+                ));
+                cur_offset += 8
+            }
+            insts.push(Inst::AjustSp {
+                amount: -(stack_size as i64),
+            });
+        }
+        (clobbered_size as u64, insts)
+    }
+
+    fn gen_clobber_restore(
+        call_conv: isa::CallConv,
+        sig: &Signature,
+        _flags: &settings::Flags,
+        clobbers: &[Writable<RealReg>],
+        fixed_frame_storage_size: u32,
+        _outgoing_args_size: u32,
+    ) -> SmallVec<[Inst; 16]> {
+        let mut insts = SmallVec::new();
+        let clobbered_callee_saves =
+            Self::get_clobbered_callee_saves(call_conv, _flags, sig, clobbers);
+        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves);
+        if stack_size > 0 {
+            insts.push(Inst::AjustSp {
+                amount: stack_size as i64,
+            });
+        }
+        let mut cur_offset = 8;
+        for reg in &clobbered_callee_saves {
+            let rreg = reg.to_reg();
+            let ty = match rreg.class() {
+                regalloc2::RegClass::Int => I64,
+                regalloc2::RegClass::Float => F64,
+            };
+            insts.push(Self::gen_load_stack(
+                StackAMode::SPOffset(-cur_offset, ty),
+                Writable::from_reg(real_reg_to_reg(reg.to_reg())),
+                ty,
+            ));
+            cur_offset += 8
+        }
+        insts
+    }
+
+    fn gen_call(
+        dest: &CallDest,
+        uses: CallArgList,
+        defs: CallRetList,
+        clobbers: PRegSet,
+        opcode: ir::Opcode,
+        tmp: Writable<Reg>,
+        callee_conv: isa::CallConv,
+        caller_conv: isa::CallConv,
+    ) -> SmallVec<[Self::I; 2]> {
+        let mut insts = SmallVec::new();
+        match &dest {
+            &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call {
+                info: Box::new(CallInfo {
+                    dest: name.clone(),
+                    uses,
+                    defs,
+                    clobbers,
+                    opcode,
+                    caller_callconv: caller_conv,
+                    callee_callconv: callee_conv,
+                }),
+            }),
+            &CallDest::ExtName(ref name, RelocDistance::Far) => {
+                insts.push(Inst::LoadExtName {
+                    rd: tmp,
+                    name: Box::new(name.clone()),
+                    offset: 0,
+                });
+                insts.push(Inst::CallInd {
+                    info: Box::new(CallIndInfo {
+                        rn: tmp.to_reg(),
+                        uses,
+                        defs,
+                        clobbers,
+                        opcode,
+                        caller_callconv: caller_conv,
+                        callee_callconv: callee_conv,
+                    }),
+                });
+            }
+            &CallDest::Reg(reg) => insts.push(Inst::CallInd {
+                info: Box::new(CallIndInfo {
+                    rn: *reg,
+                    uses,
+                    defs,
+                    clobbers,
+                    opcode,
+                    caller_callconv: caller_conv,
+                    callee_callconv: callee_conv,
+                }),
+            }),
+        }
+        insts
+    }
+
+    fn gen_memcpy(
+        call_conv: isa::CallConv,
+        dst: Reg,
+        src: Reg,
+        tmp: Writable<Reg>,
+        _tmp2: Writable<Reg>,
+        size: usize,
+    ) -> SmallVec<[Self::I; 8]> {
+        let mut insts = SmallVec::new();
+        let arg0 = Writable::from_reg(x_reg(10));
+        let arg1 = Writable::from_reg(x_reg(11));
+        let arg2 = Writable::from_reg(x_reg(12));
+        insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
+        insts.push(Inst::Call {
+            info: Box::new(CallInfo {
+                dest: ExternalName::LibCall(LibCall::Memcpy),
+                uses: smallvec![
+                    CallArgPair {
+                        vreg: dst,
+                        preg: arg0.to_reg()
+                    },
+                    CallArgPair {
+                        vreg: src,
+                        preg: arg1.to_reg()
+                    },
+                    CallArgPair {
+                        vreg: tmp.to_reg(),
+                        preg: arg2.to_reg()
+                    }
+                ],
+                defs: smallvec![],
+                clobbers: Self::get_regs_clobbered_by_call(call_conv),
+                opcode: Opcode::Call,
+                caller_callconv: call_conv,
+                callee_callconv: call_conv,
+            }),
+        });
+        insts
+    }
+
+    fn get_number_of_spillslots_for_value(rc: RegClass, _target_vector_bytes: u32) -> u32 {
+        // We allocate in terms of 8-byte slots.
+        match rc {
+            RegClass::Int => 1,
+            RegClass::Float => 1,
+        }
+    }
+
+    /// Get the current virtual-SP offset from an instruction-emission state.
+    fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
+        s.virtual_sp_offset
+    }
+
+    /// Get the nominal-SP-to-FP offset from an instruction-emission state.
+    fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
+        s.nominal_sp_to_fp
+    }
+
+    fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
+        let mut v = PRegSet::empty();
+        for (k, need_save) in CALLER_SAVE_X_REG.iter().enumerate() {
+            if !*need_save {
+                continue;
+            }
+            v.add(px_reg(k));
+        }
+        for (k, need_save) in CALLER_SAVE_F_REG.iter().enumerate() {
+            if !*need_save {
+                continue;
+            }
+            v.add(pf_reg(k));
+        }
+        v
+    }
+
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        _flags: &settings::Flags,
+        _sig: &Signature,
+        regs: &[Writable<RealReg>],
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = regs
+            .iter()
+            .cloned()
+            .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
+            .collect();
+
+        regs.sort();
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        is_leaf: bool,
+        stack_args_size: u32,
+        num_clobbered_callee_saves: usize,
+        fixed_frame_storage_size: u32,
+    ) -> bool {
+        !is_leaf
+            // The function arguments that are passed on the stack are addressed
+            // relative to the Frame Pointer.
+            || stack_args_size > 0
+            || num_clobbered_callee_saves > 0
+        || fixed_frame_storage_size > 0
+    }
+
+    fn gen_inline_probestack(frame_size: u32, guard_size: u32) -> SmallInstVec<Self::I> {
+        // Unroll at most n consecutive probes, before falling back to using a loop
+        const PROBE_MAX_UNROLL: u32 = 3;
+        // Number of probes that we need to perform
+        let probe_count = align_to(frame_size, guard_size) / guard_size;
+
+        if probe_count <= PROBE_MAX_UNROLL {
+            Self::gen_probestack_unroll(guard_size, probe_count)
+        } else {
+            Self::gen_probestack_loop(guard_size, probe_count)
+        }
+    }
+}
+
+const CALLER_SAVE_X_REG: [bool; 32] = [
+    false, true, false, false, false, true, true, true, // 0-7
+    false, false, true, true, true, true, true, true, // 8-15
+    true, true, false, false, false, false, false, false, // 16-23
+    false, false, false, false, true, true, true, true, // 24-31
+];
+const CALLEE_SAVE_X_REG: [bool; 32] = [
+    false, false, true, false, false, false, false, false, // 0-7
+    true, true, false, false, false, false, false, false, // 8-15
+    false, false, true, true, true, true, true, true, // 16-23
+    true, true, true, true, false, false, false, false, // 24-31
+];
+const CALLER_SAVE_F_REG: [bool; 32] = [
+    true, true, true, true, true, true, true, true, // 0-7
+    false, true, true, true, true, true, true, true, // 8-15
+    true, true, false, false, false, false, false, false, // 16-23
+    false, false, false, false, true, true, true, true, // 24-31
+];
+const CALLEE_SAVE_F_REG: [bool; 32] = [
+    false, false, false, false, false, false, false, false, // 0-7
+    true, false, false, false, false, false, false, false, // 8-15
+    false, false, true, true, true, true, true, true, // 16-23
+    true, true, true, true, false, false, false, false, // 24-31
+];
+
+/// This should be the registers that must be saved by callee.
+#[inline]
+fn is_reg_saved_in_prologue(_conv: CallConv, reg: RealReg) -> bool {
+    if reg.class() == RegClass::Int {
+        CALLEE_SAVE_X_REG[reg.hw_enc() as usize]
+    } else {
+        CALLEE_SAVE_F_REG[reg.hw_enc() as usize]
+    }
+}
+
+fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
+    let mut clobbered_size = 0;
+    for reg in clobbers {
+        match reg.to_reg().class() {
+            RegClass::Int => {
+                clobbered_size += 8;
+            }
+            RegClass::Float => {
+                clobbered_size += 8;
+            }
+        }
+    }
+    align_to(clobbered_size, 16)
+}
+
+impl Riscv64MachineDeps {
+    fn gen_probestack_unroll(guard_size: u32, probe_count: u32) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::with_capacity(probe_count as usize);
+        for i in 0..probe_count {
+            let offset = (guard_size * (i + 1)) as i64;
+            insts.push(Self::gen_store_stack(
+                StackAMode::SPOffset(-offset, I8),
+                zero_reg(),
+                I32,
+            ));
+        }
+        insts
+    }
+    fn gen_probestack_loop(guard_size: u32, probe_count: u32) -> SmallInstVec<Inst> {
+        smallvec![Inst::StackProbeLoop {
+            guard_size,
+            probe_count,
+            tmp: Writable::from_reg(x_reg(28)), // t3
+        }]
+    }
+}
--- a/cranelift/codegen/src/isa/riscv64/inst.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst.isle
--- a/cranelift/codegen/src/isa/riscv64/inst/args.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs
--- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
--- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
--- a/cranelift/codegen/src/isa/riscv64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs
@@ -0,0 +1,218 @@
+//! Riscv64 ISA definitions: immediate constants.
+
+// Some variants are never constructed, but we still want them as options in the future.
+use super::Inst;
+#[allow(dead_code)]
+use std::fmt::{Debug, Display, Formatter, Result};
+
+#[derive(Copy, Clone, Debug, Default)]
+pub struct Imm12 {
+    pub bits: i16,
+}
+
+impl Imm12 {
+    pub(crate) const FALSE: Self = Self { bits: 0 };
+    pub(crate) const TRUE: Self = Self { bits: -1 };
+    pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+        let sign_bit = 1 << 11;
+        if val == 0 {
+            Some(Imm12 { bits: 0 })
+        } else if (val & sign_bit) != 0 && (val >> 12) == 0xffff_ffff_ffff_f {
+            Some(Imm12 {
+                bits: (val & 0xffff) as i16,
+            })
+        } else if (val & sign_bit) == 0 && (val >> 12) == 0 {
+            Some(Imm12 {
+                bits: (val & 0xffff) as i16,
+            })
+        } else {
+            None
+        }
+    }
+    #[inline]
+    pub fn from_bits(bits: i16) -> Self {
+        Self { bits: bits & 0xfff }
+    }
+    /// Create a zero immediate of this format.
+    #[inline]
+    pub fn zero() -> Self {
+        Imm12 { bits: 0 }
+    }
+    #[inline]
+    pub fn as_i16(self) -> i16 {
+        self.bits
+    }
+    #[inline]
+    pub fn as_u32(&self) -> u32 {
+        (self.bits as u32) & 0xfff
+    }
+}
+
+impl Into<i64> for Imm12 {
+    fn into(self) -> i64 {
+        self.bits as i64
+    }
+}
+
+impl Display for Imm12 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{:+}", self.bits)
+    }
+}
+
+impl std::ops::Neg for Imm12 {
+    type Output = Self;
+    fn neg(self) -> Self::Output {
+        Self { bits: -self.bits }
+    }
+}
+
+// singed
+#[derive(Clone, Copy, Default)]
+pub struct Imm20 {
+    /// The immediate bits.
+    pub bits: i32,
+}
+
+impl Imm20 {
+    #[inline]
+    pub fn from_bits(bits: i32) -> Self {
+        Self {
+            bits: bits & 0xf_ffff,
+        }
+    }
+    #[inline]
+    pub fn as_u32(&self) -> u32 {
+        (self.bits as u32) & 0xf_ffff
+    }
+}
+
+impl Debug for Imm20 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.bits)
+    }
+}
+
+impl Display for Imm20 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.bits)
+    }
+}
+
+#[derive(Clone, Copy)]
+pub struct Uimm5 {
+    bits: u8,
+}
+
+impl Uimm5 {
+    pub fn from_bits(bits: u8) -> Self {
+        Self { bits }
+    }
+    /// Create a zero immediate of this format.
+    pub fn zero() -> Self {
+        Self { bits: 0 }
+    }
+    pub fn as_u32(&self) -> u32 {
+        (self.bits as u32) & 0b1_1111
+    }
+}
+
+impl Debug for Uimm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.bits)
+    }
+}
+
+impl Display for Uimm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.bits)
+    }
+}
+
+impl Inst {
+    pub(crate) fn imm_min() -> i64 {
+        let imm20_max: i64 = (1 << 19) << 12;
+        let imm12_max = 1 << 11;
+        -imm20_max - imm12_max
+    }
+    pub(crate) fn imm_max() -> i64 {
+        let imm20_max: i64 = ((1 << 19) - 1) << 12;
+        let imm12_max = (1 << 11) - 1;
+        imm20_max + imm12_max
+    }
+
+    /// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate.
+    /// This helper produces an imm12, imm20, or both to generate the value.
+    ///
+    /// `value` must be between `imm_min()` and `imm_max()`, or else
+    /// this helper returns `None`.
+    pub(crate) fn generate_imm<R>(
+        value: u64,
+        mut handle_imm: impl FnMut(Option<Imm20>, Option<Imm12>) -> R,
+    ) -> Option<R> {
+        if let Some(imm12) = Imm12::maybe_from_u64(value) {
+            // can be load using single imm12.
+            let r = handle_imm(None, Some(imm12));
+            return Some(r);
+        }
+        let value = value as i64;
+        if !(value >= Self::imm_min() && value <= Self::imm_max()) {
+            // not in range, return None.
+            return None;
+        }
+        const MOD_NUM: i64 = 4096;
+        let (imm20, imm12) = if value > 0 {
+            let mut imm20 = value / MOD_NUM;
+            let mut imm12 = value % MOD_NUM;
+            if imm12 >= 2048 {
+                imm12 -= MOD_NUM;
+                imm20 += 1;
+            }
+            assert!(imm12 >= -2048 && imm12 <= 2047);
+            (imm20, imm12)
+        } else {
+            // this is the abs value.
+            let value_abs = value.abs();
+            let imm20 = value_abs / MOD_NUM;
+            let imm12 = value_abs % MOD_NUM;
+            let mut imm20 = -imm20;
+            let mut imm12 = -imm12;
+            if imm12 < -2048 {
+                imm12 += MOD_NUM;
+                imm20 -= 1;
+            }
+            (imm20, imm12)
+        };
+        assert!(imm20 >= -(0x7_ffff + 1) && imm20 <= 0x7_ffff);
+        assert!(imm20 != 0 || imm12 != 0);
+        Some(handle_imm(
+            if imm20 != 0 {
+                Some(Imm20::from_bits(imm20 as i32))
+            } else {
+                None
+            },
+            if imm12 != 0 {
+                Some(Imm12::from_bits(imm12 as i16))
+            } else {
+                None
+            },
+        ))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn test_imm12() {
+        let x = Imm12::zero();
+        assert_eq!(0, x.as_u32());
+        Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap();
+    }
+
+    #[test]
+    fn imm20_and_imm12() {
+        assert!(Inst::imm_max() == (i32::MAX - 2048) as i64);
+        assert!(Inst::imm_min() == i32::MIN as i64 - 2048);
+    }
+}
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
--- a/cranelift/codegen/src/isa/riscv64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/regs.rs
@@ -0,0 +1,220 @@
+//! Riscv64 ISA definitions: registers.
+//!
+
+use crate::settings;
+
+use crate::machinst::{Reg, Writable};
+
+use crate::machinst::RealReg;
+use alloc::vec;
+use alloc::vec::Vec;
+
+use regalloc2::VReg;
+use regalloc2::{MachineEnv, PReg, RegClass};
+
+// first argument of function call
+#[inline]
+pub fn a0() -> Reg {
+    x_reg(10)
+}
+
+// second argument of function call
+#[inline]
+pub fn a1() -> Reg {
+    x_reg(11)
+}
+
+// third argument of function call
+#[inline]
+pub fn a2() -> Reg {
+    x_reg(12)
+}
+
+#[inline]
+pub fn writable_a0() -> Writable<Reg> {
+    Writable::from_reg(a0())
+}
+#[inline]
+pub fn writable_a1() -> Writable<Reg> {
+    Writable::from_reg(a1())
+}
+#[inline]
+pub fn writable_a2() -> Writable<Reg> {
+    Writable::from_reg(a2())
+}
+
+#[inline]
+pub fn fa0() -> Reg {
+    f_reg(10)
+}
+#[inline]
+pub fn writable_fa0() -> Writable<Reg> {
+    Writable::from_reg(fa0())
+}
+#[inline]
+pub fn writable_fa1() -> Writable<Reg> {
+    Writable::from_reg(fa1())
+}
+#[inline]
+pub fn fa1() -> Reg {
+    f_reg(11)
+}
+
+#[inline]
+pub fn fa7() -> Reg {
+    f_reg(17)
+}
+
+/// Get a reference to the zero-register.
+#[inline]
+pub fn zero_reg() -> Reg {
+    x_reg(0)
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+#[inline]
+pub fn writable_zero_reg() -> Writable<Reg> {
+    Writable::from_reg(zero_reg())
+}
+#[inline]
+pub fn stack_reg() -> Reg {
+    x_reg(2)
+}
+
+/// Get a writable reference to the stack-pointer register.
+#[inline]
+pub fn writable_stack_reg() -> Writable<Reg> {
+    Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x1).
+pub fn link_reg() -> Reg {
+    x_reg(1)
+}
+
+/// Get a writable reference to the link register.
+#[inline]
+pub fn writable_link_reg() -> Writable<Reg> {
+    Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the frame pointer (x29).
+#[inline]
+pub fn fp_reg() -> Reg {
+    x_reg(8)
+}
+
+/// Get a writable reference to the frame pointer.
+#[inline]
+pub fn writable_fp_reg() -> Writable<Reg> {
+    Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the first temporary, sometimes "spill temporary",
+/// register. This register is used in various ways as a temporary.
+#[inline]
+pub fn spilltmp_reg() -> Reg {
+    x_reg(31)
+}
+
+/// Get a writable reference to the spilltmp reg.
+#[inline]
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg())
+}
+
+///spilltmp2
+#[inline]
+pub fn spilltmp_reg2() -> Reg {
+    x_reg(30)
+}
+
+/// Get a writable reference to the spilltmp2 reg.
+#[inline]
+pub fn writable_spilltmp_reg2() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg2())
+}
+
+pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv {
+    let preferred_regs_by_class: [Vec<PReg>; 2] = {
+        let mut x_register: Vec<PReg> = vec![];
+        x_register.push(PReg::new(5, RegClass::Int));
+        for i in 6..=7 {
+            x_register.push(PReg::new(i, RegClass::Int));
+        }
+        for i in 10..=17 {
+            x_register.push(PReg::new(i, RegClass::Int));
+        }
+        for i in 28..=29 {
+            x_register.push(PReg::new(i, RegClass::Int));
+        }
+
+        let mut f_register: Vec<PReg> = vec![];
+        for i in 0..=7 {
+            f_register.push(PReg::new(i, RegClass::Float));
+        }
+        for i in 10..=17 {
+            f_register.push(PReg::new(i, RegClass::Float));
+        }
+        for i in 28..=31 {
+            f_register.push(PReg::new(i, RegClass::Float));
+        }
+        [x_register, f_register]
+    };
+
+    let non_preferred_regs_by_class: [Vec<PReg>; 2] = {
+        let mut x_register: Vec<PReg> = vec![];
+        x_register.push(PReg::new(9, RegClass::Int));
+        for i in 18..=27 {
+            x_register.push(PReg::new(i, RegClass::Int));
+        }
+        let mut f_register: Vec<PReg> = vec![];
+        for i in 8..=9 {
+            f_register.push(PReg::new(i, RegClass::Float));
+        }
+        for i in 18..=27 {
+            f_register.push(PReg::new(i, RegClass::Float));
+        }
+        [x_register, f_register]
+    };
+
+    MachineEnv {
+        preferred_regs_by_class,
+        non_preferred_regs_by_class,
+        fixed_stack_slots: vec![],
+    }
+}
+
+#[inline]
+pub fn x_reg(enc: usize) -> Reg {
+    let p_reg = PReg::new(enc, RegClass::Int);
+    let v_reg = VReg::new(p_reg.index(), p_reg.class());
+    Reg::from(v_reg)
+}
+pub fn px_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Int)
+}
+
+#[inline]
+pub fn f_reg(enc: usize) -> Reg {
+    let p_reg = PReg::new(enc, RegClass::Float);
+    let v_reg = VReg::new(p_reg.index(), p_reg.class());
+    Reg::from(v_reg)
+}
+pub const fn pf_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Float)
+}
+#[inline]
+pub(crate) fn real_reg_to_reg(x: RealReg) -> Reg {
+    let v_reg = VReg::new(x.hw_enc() as usize, x.class());
+    Reg::from(v_reg)
+}
+
+#[allow(dead_code)]
+pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec<Writable<Reg>> {
+    let mut regs = vec![];
+    for i in start..=end {
+        regs.push(Writable::from_reg(x_reg(i)));
+    }
+    regs
+}
--- a/cranelift/codegen/src/isa/riscv64/inst/unwind.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/unwind.rs
@@ -0,0 +1,2 @@
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
--- a/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs
@@ -0,0 +1,173 @@
+//! Unwind information for System V ABI (Riscv64).
+
+use crate::isa::riscv64::inst::regs;
+use crate::isa::unwind::systemv::RegisterMappingError;
+use crate::machinst::Reg;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
+use regalloc2::RegClass;
+
+/// Creates a new riscv64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+    use gimli::write::CallFrameInstruction;
+
+    let mut entry = CommonInformationEntry::new(
+        Encoding {
+            address_size: 8,
+            format: Format::Dwarf32,
+            version: 1,
+        },
+        4,  // Code alignment factor
+        -8, // Data alignment factor
+        Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16),
+    );
+
+    // Every frame will start with the call frame address (CFA) at SP
+    let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into());
+    entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
+
+    entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+    match reg.class() {
+        RegClass::Int => {
+            let reg = reg.to_real_reg().unwrap().hw_enc() as u16;
+            Ok(Register(reg))
+        }
+        RegClass::Float => {
+            let reg = reg.to_real_reg().unwrap().hw_enc() as u16;
+            Ok(Register(32 + reg))
+        }
+    }
+}
+
+pub(crate) struct RegisterMapper;
+
+impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+    fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+        Ok(map_reg(reg)?.0)
+    }
+    fn sp(&self) -> u16 {
+        regs::stack_reg().to_real_reg().unwrap().hw_enc() as u16
+    }
+    fn fp(&self) -> Option<u16> {
+        Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16)
+    }
+    fn lr(&self) -> Option<u16> {
+        Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16)
+    }
+    fn lr_offset(&self) -> Option<u32> {
+        Some(8)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+
+    use crate::ir::{
+        types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind,
+        UserFuncName,
+    };
+    use crate::isa::{lookup, CallConv};
+    use crate::settings::{builder, Flags};
+    use crate::Context;
+    use gimli::write::Address;
+    use std::str::FromStr;
+    use target_lexicon::triple;
+
+    #[test]
+    fn test_simple_func() {
+        let isa = lookup(triple!("riscv64"))
+            .expect("expect riscv64 ISA")
+            .finish(Flags::new(builder()))
+            .expect("Creating compiler backend");
+
+        let mut context = Context::for_function(create_function(
+            CallConv::SystemV,
+            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+        ));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(1234))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }");
+    }
+
+    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+        let mut func =
+            Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv));
+
+        let block0 = func.dfg.make_block();
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().return_(&[]);
+
+        if let Some(stack_slot) = stack_slot {
+            func.sized_stack_slots.push(stack_slot);
+        }
+
+        func
+    }
+
+    #[test]
+    fn test_multi_return_func() {
+        let isa = lookup(triple!("riscv64"))
+            .expect("expect riscv64 ISA")
+            .finish(Flags::new(builder()))
+            .expect("Creating compiler backend");
+
+        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(4321))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(
+            format!("{:?}", fde),
+            "FrameDescriptionEntry { address: Constant(4321), length: 12, lsda: None, instructions: [] }"
+        );
+    }
+
+    fn create_multi_return_function(call_conv: CallConv) -> Function {
+        let mut sig = Signature::new(call_conv);
+        sig.params.push(AbiParam::new(types::I32));
+        let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig);
+
+        let block0 = func.dfg.make_block();
+        let v0 = func.dfg.append_block_param(block0, types::I32);
+        let block1 = func.dfg.make_block();
+        let block2 = func.dfg.make_block();
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().brnz(v0, block2, &[]);
+        pos.ins().jump(block1, &[]);
+
+        pos.insert_block(block1);
+        pos.ins().return_(&[]);
+
+        pos.insert_block(block2);
+        pos.ins().return_(&[]);
+
+        func
+    }
+}
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -0,0 +1,983 @@
+;; riscv64 instruction selection and CLIF-to-MachInst lowering.
+
+;; The main lowering constructor term: takes a clif `Inst` and returns the
+;; register(s) within which the lowered instruction's result values live.
+(decl lower (Inst) InstOutput)
+
+;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
+  (imm ty n))
+
+;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (bconst $false)))
+  (imm ty 0))
+
+(rule (lower (has_type ty (bconst $true)))
+  (imm ty 1))
+
+
+;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (null)))
+  (imm ty 0))
+
+
+;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_32 ty) (iadd x y)))
+  (alu_rrr (AluOPRRR.Addw) x y))
+
+;; Base case, simply adding things in registers.
+(rule (lower (has_type (fits_in_64 ty) (iadd x y)))
+  (alu_add x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
+  (alu_rr_imm12 (select_addi ty) x y))
+
+(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
+  (alu_rr_imm12 (select_addi ty) y x))
+
+(rule 
+  (lower (has_type $I128 (iadd  x y)))
+  (let
+    ( ;; low part.
+      (low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0)))
+      ;; compute carry.
+      (carry Reg(alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0)))
+      ;; 
+      (high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1)))
+      ;; add carry.
+      (high Reg (alu_add high_tmp carry)))
+    (value_regs low high)))
+
+;;; Rules for `iadd_ifcout` ;;;;;;;;;;;;;
+(rule
+  (lower (has_type (fits_in_64 ty) (iadd_ifcout x y)))
+  (output_ifcout (alu_add x y)))
+
+
+;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Base case, simply subtracting things in registers.
+
+(rule (lower (has_type (fits_in_64 ty) (isub x y)))
+  (alu_rrr (AluOPRRR.Sub) x y))
+
+(rule (lower (has_type (fits_in_32 ty) (isub x y)))
+  (alu_rrr (AluOPRRR.Subw) x y))
+
+(rule (lower (has_type $I128 (isub x y)))
+  (i128_sub x y))
+
+;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+(rule (lower (has_type (fits_in_64 ty) (ineg x)))
+  (alu_rrr (AluOPRRR.Sub) (zero_reg) x))
+
+
+;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (fits_in_64 ty) (imul x y)))
+  (alu_rrr (AluOPRRR.Mul) x y))
+(rule (lower (has_type (fits_in_32 ty) (imul x y)))
+  (alu_rrr (AluOPRRR.Mulw) x y))
+
+;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (smulhi x y)))
+  (lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
+
+;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (umulhi x y)))
+  (lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
+
+;; for I128
+(rule (lower (has_type $I128 (imul x y)))
+  (let
+    ((x_regs ValueRegs x)
+      (x_lo Reg (value_regs_get x_regs 0))
+      (x_hi Reg (value_regs_get x_regs 1))
+
+      ;; Get the high/low registers for `y`.
+      (y_regs ValueRegs y)
+      (y_lo Reg (value_regs_get y_regs 0))
+      (y_hi Reg (value_regs_get y_regs 1))
+
+      ;; 128bit mul formula:
+      ;;   dst_lo = x_lo * y_lo
+      ;;   dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
+      ;;
+      ;; We can convert the above formula into the following
+      ;; umulh   dst_hi, x_lo, y_lo
+      ;; madd    dst_hi, x_lo, y_hi, dst_hi
+      ;; madd    dst_hi, x_hi, y_lo, dst_hi
+      ;; madd    dst_lo, x_lo, y_lo, zero
+      (dst_hi1 Reg (umulh x_lo y_lo))
+      (dst_hi2 Reg (madd x_lo y_hi dst_hi1))
+      (dst_hi Reg (madd x_hi y_lo dst_hi2))
+      (dst_lo Reg (madd x_lo y_lo (zero_reg))))
+    (value_regs dst_lo dst_hi)))
+
+
+;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (fits_in_32 ty) (udiv x y)))
+  (let
+    ((y2 Reg (ext_int_if_need $false y ty))
+      (_ InstOutput (gen_div_by_zero y2)))
+    (alu_rrr (AluOPRRR.Divuw) (ext_int_if_need $false x ty) y2)))
+
+(rule (lower (has_type (fits_in_32 ty) (sdiv x y)))
+  (let
+    ((a Reg (ext_int_if_need $true x ty))
+      (b Reg (ext_int_if_need $true y ty))
+      (_ InstOutput (gen_div_overflow a b ty))
+      (_ InstOutput (gen_div_by_zero b)))
+    (alu_rrr (AluOPRRR.Divw) a b)))
+
+(rule (lower (has_type $I64 (sdiv x y)))
+  (let
+    ((_ InstOutput (gen_div_overflow x y $I64))
+      (_ InstOutput (gen_div_by_zero y))    )
+    (alu_rrr (AluOPRRR.Div) x y)))
+
+(rule (lower (has_type $I64 (udiv x y)))
+  (let
+    ((_ InstOutput (gen_div_by_zero y)))
+    (alu_rrr (AluOPRRR.DivU) x y)))
+
+;;;; Rules for `rem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (fits_in_16 ty) (urem x y)))
+  (let
+    ((y2 Reg(ext_int_if_need $false y ty))
+      (_ InstOutput (gen_div_by_zero y2)))
+    (alu_rrr (AluOPRRR.Remuw) (ext_int_if_need $false x ty) y2)))
+
+(rule (lower (has_type (fits_in_16 ty) (srem x y)))
+  (let 
+    ((y2 Reg (ext_int_if_need $true y ty))
+      (_ InstOutput (gen_div_by_zero y2)))
+    (alu_rrr (AluOPRRR.Remw) (ext_int_if_need $true x ty) y2)))
+
+(rule (lower (has_type $I32 (srem x y)))
+  (let 
+    ((y2 Reg (ext_int_if_need $true y $I32))
+      (_ InstOutput (gen_div_by_zero y2)))
+   (alu_rrr (AluOPRRR.Remw) x y2)))
+
+(rule (lower (has_type $I32 (urem x y)))
+  (let 
+    ((y2 Reg (ext_int_if_need $false y $I32))
+        (_ InstOutput (gen_div_by_zero y2)))
+    (alu_rrr (AluOPRRR.Remuw) x y2)))
+
+(rule (lower (has_type $I64 (srem x y)))
+  (let
+    ((_ InstOutput (gen_div_by_zero y)))
+    (alu_rrr (AluOPRRR.Rem) x y)))
+
+(rule (lower (has_type $I64 (urem x y)))
+  (let
+    ((_ InstOutput (gen_div_by_zero y)))
+    (alu_rrr (AluOPRRR.RemU) x y)))
+
+;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (band x y)))
+  (alu_rrr (AluOPRRR.And) x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule (lower (has_type (fits_in_64 ty) (band x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Andi) x y))
+
+(rule (lower (has_type (fits_in_64 ty) (band (imm12_from_value x) y)))
+  (alu_rr_imm12 (AluOPRRI.Andi) y x))
+
+(rule (lower (has_type $B128 (band x y)))
+  (lower_b128_binary (AluOPRRR.And) x y))
+(rule (lower (has_type $I128 (band x y)))
+  (lower_b128_binary (AluOPRRR.And) x y))
+
+(rule (lower (has_type $F32 (band x y)))
+  (lower_float_binary (AluOPRRR.And) x y $F32))
+(rule (lower (has_type $F64 (band x y)))
+  (lower_float_binary (AluOPRRR.And) x y $F64))
+
+
+;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (bor x y)))
+  (alu_rrr (AluOPRRR.Or) x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule (lower (has_type (fits_in_64 ty) (bor x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Ori) x y))
+
+(rule (lower (has_type (fits_in_64 ty) (bor (imm12_from_value x) y)))
+  (alu_rr_imm12 (AluOPRRI.Ori) y x))
+(rule (lower (has_type $B128 (bor x y)))
+  (lower_b128_binary (AluOPRRR.Or) x y))
+(rule (lower (has_type $I128 (bor x y)))
+  (lower_b128_binary (AluOPRRR.Or) x y))
+(rule (lower (has_type $F32 (bor x y)))
+  (lower_float_binary (AluOPRRR.Or) x y $F32))
+(rule (lower (has_type $F64 (bor x y)))
+  (lower_float_binary (AluOPRRR.Or) x y $F64))
+
+
+;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (bxor x y)))
+  (alu_rrr (AluOPRRR.Xor) x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule (lower (has_type (fits_in_64 ty) (bxor x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Xori) x y))
+
+(rule (lower (has_type (fits_in_64 ty) (bxor (imm12_from_value x) y)))
+  (alu_rr_imm12 (AluOPRRI.Xori) y x))
+(rule (lower (has_type $B128 (bxor x y)))
+  (lower_b128_binary (AluOPRRR.Xor) x y))
+(rule (lower (has_type $I128 (bxor x y)))
+  (lower_b128_binary (AluOPRRR.Xor) x y))
+(rule (lower (has_type $F32 (bxor x y)))
+  (lower_float_binary (AluOPRRR.Xor) x y $F32))
+(rule (lower (has_type $F64 (bxor x y)))
+  (lower_float_binary (AluOPRRR.Xor) x y $F64))
+
+
+;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type fits_in_64 (bnot x)))
+  (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_neg_bits -1)))
+
+(rule (lower (has_type $I128 (bnot x)))
+  (bnot_128 x))
+(rule (lower (has_type $B128 (bnot x)))
+  (bnot_128 x))
+(rule 
+  (lower (has_type $F32 (bnot x)))
+  (lower_float_bnot x $F32)
+)
+(rule 
+  (lower (has_type $F64 (bnot x)))
+  (lower_float_bnot x $F64)
+)
+
+;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type ty (bitrev x)))
+  (lower_bit_reverse x ty))
+
+(rule (lower (has_type $I128 (bitrev x)))
+  (let ((val ValueRegs x)
+    (lo_rev Reg (lower_bit_reverse (value_regs_get val 0) $I64))
+    (hi_rev Reg (lower_bit_reverse (value_regs_get val 1) $I64)))
+    (value_regs hi_rev lo_rev)))
+
+
+;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type ty (ctz x)))
+  (lower_ctz ty x))
+
+(rule (lower (has_type $I128 (ctz x)))
+  (lower_ctz_128 x))
+
+;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type ty (clz x)))
+  (lower_clz ty x))
+(rule (lower (has_type $I128 (clz x)))
+  (lower_clz_i128 x))
+
+;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type out (uextend x @ (value_type in))))
+  (lower_extend x $false (ty_bits in) (ty_bits out)))
+
+;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type out (sextend x @ (value_type in))))
+  (lower_extend x $true (ty_bits in) (ty_bits out)))
+
+
+;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (band_not x y)))
+  (gen_andn x y))
+(rule (lower (has_type $I128 (band_not x y)))
+  (let
+    ((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0)))
+      (high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (popcnt x)))
+  (lower_popcnt x ty))
+(rule (lower (has_type $I128 (popcnt x)))
+  (lower_popcnt_i128 x))
+
+;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type $I8 (ishl x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sllw) x (alu_andi y 7))
+)
+(rule (lower (has_type $I8(ishl x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Slliw) x (imm12_and y 7)))
+
+(rule (lower (has_type $I16 (ishl x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sllw) x (alu_andi y 15))
+)
+(rule (lower (has_type $I16(ishl x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Slliw) x (imm12_and y 15)))
+
+(rule (lower (has_type $I32(ishl x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sllw) x y))
+(rule (lower (has_type $I32 (ishl x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Slliw) x y))
+
+(rule (lower (has_type $I64 (ishl x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Slli) x y))
+(rule (lower (has_type $I64(ishl x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sll) x y))
+
+(rule (lower (has_type $I128 (ishl x y)))
+  (lower_i128_ishl x y))
+
+;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type $I8 (ushr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Srlw) (ext_int_if_need $false x $I8) (alu_andi y 7))
+)
+(rule (lower (has_type $I8(ushr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.SrliW) (ext_int_if_need $false x $I8) (imm12_and y 7)))
+
+(rule (lower (has_type $I16 (ushr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Srlw) (ext_int_if_need $false x $I16) (alu_andi y 15))
+)
+(rule (lower (has_type $I16(ushr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.SrliW) (ext_int_if_need $false x $I16) (imm12_and y 15)))
+
+(rule (lower (has_type $I32(ushr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Srlw) x y))
+(rule (lower (has_type $I32 (ushr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.SrliW) x y))
+
+(rule (lower (has_type $I64 (ushr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Srli) x y))
+(rule (lower (has_type $I64(ushr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Srl) x y))
+
+(rule (lower (has_type $I128 (ushr x y)))
+  (lower_i128_ushr x y))
+
+
+;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type $I8 (sshr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sra) (ext_int_if_need $true x $I8) (alu_andi y 7))
+)
+(rule (lower (has_type $I8(sshr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Srai) (ext_int_if_need $true x $I8) (imm12_and y 7)))
+
+(rule (lower (has_type $I16 (sshr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sra) (ext_int_if_need $true x $I16) (alu_andi y 15))
+)
+(rule (lower (has_type $I16(sshr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Srai) (ext_int_if_need $true x $I16) (imm12_and y 15)))
+
+(rule (lower (has_type $I32 (sshr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sraw) x y))
+(rule (lower (has_type $I32 (sshr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Sraiw) x y))
+(rule (lower (has_type $I64 (sshr x (valueregs_2_reg y))))
+  (alu_rrr (AluOPRRR.Sra) x y))
+(rule (lower (has_type $I64(sshr x (imm12_from_value y))))
+  (alu_rr_imm12 (AluOPRRI.Srai) x y))
+(rule (lower (has_type $I128 (sshr x y)))
+  (lower_i128_sshr x y))
+
+
+;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty)(rotl x (valueregs_2_reg y))))
+  (lower_rotl ty (ext_int_if_need $false x ty) y))
+
+(rule (lower (has_type $I128 (rotl x y)))
+  (lower_i128_rotl x y))
+
+;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty)(rotr x (valueregs_2_reg y))))
+  (lower_rotr ty (ext_int_if_need $false x ty) y))
+
+(rule (lower (has_type $I128 (rotr x y)))
+  (lower_i128_rotr x y))
+
+
+;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; notice x y order!!!
+(rule (lower (has_type (fits_in_64 ty)(bxor_not x y)))
+  (gen_xor_not x y))
+(rule (lower (has_type $I128 (bxor_not x y)))
+  (let
+    ((low Reg (gen_xor_not (value_regs_get x 0) (value_regs_get y 0)))
+      (high Reg (gen_xor_not (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)
+  )
+)
+
+;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty)(bor_not x y)))
+  (gen_orn x y))
+
+(rule (lower (has_type $I128 (bor_not x y)))
+  (let
+    ((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0)))
+      (high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high))) 
+
+
+;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty)(cls x)))
+  (lower_cls x ty))
+(rule (lower (has_type $I128 (cls x)))
+  (lower_cls_i128 x))
+
+
+;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 
+  (lower (has_type ty (fabs x)))
+  (gen_fabs x ty))
+
+;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 
+  (lower (has_type ty (fneg x)))
+  (fpu_rrr (f_copy_neg_sign_op ty) ty x x))
+
+;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type ty (fcopysign x y)))
+  (fpu_rrr (f_copysign_op ty) ty x y))
+
+;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type $F32 (fma x y z)))
+  (fpu_rrrr (FpuOPRRRR.FmaddS) $F64 x y z))
+(rule (lower (has_type $F64 (fma x y z)))
+  (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 x y z))
+
+
+;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type $F32 (sqrt x)))
+  (fpu_rr (FpuOPRR.FsqrtS)$F64 x))
+
+(rule (lower (has_type $F64 (sqrt x)))
+  (fpu_rr (FpuOPRR.FsqrtD)$F64 x))
+
+
+;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 
+  ;; 
+  (lower 
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x)))
+  (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo)))
+
+;;; for I8 and I16
+(rule 
+  (lower 
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x)))
+  (gen_atomic_rmw_loop op ty addr x))
+
+;;;special for I8 and I16 max min etc.
+;;;because I need uextend or sextend the value.
+(rule 
+  (lower 
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x)))
+  (gen_atomic_rmw_loop op ty addr (ext_int_if_need $true x ty)))
+
+
+(rule 
+  ;; 
+  (lower 
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x)))
+  ;;
+  (gen_atomic_rmw_loop op ty addr (ext_int_if_need $false x ty)))
+
+;;;;;  Rules for `AtomicRmwOp.Sub`
+(rule 
+  (lower 
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x)))
+  (let
+    ((tmp WritableReg (temp_writable_reg ty))
+      (x2 Reg (alu_rrr (AluOPRRR.Sub) (zero_reg) x)))
+    (gen_atomic (get_atomic_rmw_op ty (AtomicRmwOp.Add)) addr x2 (atomic_amo))))
+
+(decl gen_atomic_rmw_loop (AtomicRmwOp Type Reg Reg) Reg)
+(rule
+  (gen_atomic_rmw_loop op ty addr x)
+  (let
+    ((dst WritableReg (temp_writable_reg $I64))
+      (t0 WritableReg (temp_writable_reg $I64))
+      (_ Unit (emit (MInst.AtomicRmwLoop (gen_atomic_offset addr ty) op dst ty (gen_atomic_p addr ty) x t0))))
+    (writable_reg_to_reg dst)))
+
+;;;;;  Rules for `AtomicRmwOp.Nand`
+(rule 
+  (lower 
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x)))
+    (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x))
+
+(decl is_atomic_rmw_max_etc (AtomicRmwOp bool)AtomicRmwOp)
+(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc)
+
+;;;;;  Rules for `atomic load`;;;;;;;;;;;;;;;;;
+(rule 
+  (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p)))
+  (gen_atomic_load p ty))
+
+
+;;;;;  Rules for `atomic store`;;;;;;;;;;;;;;;;;
+(rule 
+  (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p))
+  (gen_atomic_store p ty src))
+
+(decl gen_atomic_offset (Reg Type) Reg)
+(rule (gen_atomic_offset p (fits_in_16 ty))
+  (alu_slli (alu_andi p 3) 3))
+
+(rule (gen_atomic_offset p _)
+  (zero_reg))
+
+(decl gen_atomic_p (Reg Type) Reg)
+(rule (gen_atomic_p p (fits_in_16 ty))
+  (alu_andi p -4))
+
+(rule (gen_atomic_p p _)
+  p)
+
+
+;;;;;  Rules for `atomic cas`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type (valid_atomic_transaction ty) (atomic_cas flags p e x)))
+  (let
+    ((t0 WritableReg (temp_writable_reg ty))
+      (dst WritableReg (temp_writable_reg ty))
+      (_ Unit(emit (MInst.AtomicCas (gen_atomic_offset p ty) t0 dst (ext_int_if_need $false e ty) (gen_atomic_p p ty) x ty))))
+    (writable_reg_to_reg dst)))
+
+;;;;;  Rules for `copy`;;;;;;;;;;;;;;;;;
+(rule (lower(has_type ty (copy x)))
+  (gen_move2 x ty ty))
+
+;;;;;  Rules for `breduce`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type ty (breduce x)))
+  (gen_move2 (value_regs_get x 0) ty ty))
+
+;;;;;  Rules for `ireduce`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type ty (ireduce x)))
+  (gen_move2 (value_regs_get x 0) ty ty))
+
+;;;;;  Rules for `fpromote`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type ty (fpromote x)))
+  (fpu_rr (FpuOPRR.FcvtDS) ty x))
+
+(rule
+  (lower (has_type ty (fdemote x)))
+  (fpu_rr (FpuOPRR.FcvtSD) ty x))
+
+
+;;;;;  Rules for `for float arithmatic`
+(rule 
+  (lower (has_type ty (fadd x y)))
+  (fpu_rrr (f_arithmatic_op ty (Opcode.Fadd)) ty x y))
+(rule 
+  (lower (has_type ty (fsub x y)))
+  (fpu_rrr (f_arithmatic_op ty (Opcode.Fsub)) ty x y))
+(rule 
+  (lower (has_type ty (fmul x y)))
+  (fpu_rrr (f_arithmatic_op ty (Opcode.Fmul)) ty x y))
+(rule 
+  (lower (has_type ty (fdiv x y)))
+  (fpu_rrr (f_arithmatic_op ty (Opcode.Fdiv)) ty x y))
+
+(rule 
+  (lower (has_type ty (fmin x y)))
+  (gen_float_select (FloatSelectOP.Min) x y ty))
+
+(rule 
+  (lower (has_type ty (fmin_pseudo x y)))
+  (gen_float_select_pseudo (FloatSelectOP.Min) x y ty))
+
+(rule 
+  (lower (has_type ty (fmax x y)))
+  (gen_float_select (FloatSelectOP.Max) x y ty))
+
+(rule 
+  (lower (has_type ty (fmax_pseudo x y)))
+  (gen_float_select_pseudo (FloatSelectOP.Max) x y ty))
+
+;;;;;  Rules for `stack_addr`;;;;;;;;;
+(rule 
+  (lower (stack_addr ss offset))
+  (gen_stack_addr ss offset))
+
+;;;;;  Rules for `is_null`;;;;;;;;;
+(rule
+  (lower (is_null v))
+  (gen_reference_check (ReferenceCheckOP.IsNull) v))
+
+;;;;;  Rules for `is_invalid`;;;;;;;;;
+(rule
+  (lower (is_invalid v))
+  (gen_reference_check (ReferenceCheckOP.IsInvalid) v))
+
+;;;;;  Rules for `select`;;;;;;;;;
+(rule
+  (lower (has_type ty (select c x y)))
+  (gen_select ty c x y)
+)
+
+;;;;;  Rules for `bitselect`;;;;;;;;;
+
+(rule
+  (lower (has_type ty (bitselect c x y)))
+  (gen_bitselect ty c x y))
+
+;;;;;  Rules for `bint`;;;;;;;;;
+(rule
+  (lower (has_type (fits_in_64 ty) (bint (valueregs_2_reg x))))
+  (gen_bint x))
+(rule
+  (lower (has_type $I128 (bint (valueregs_2_reg x))))
+  (let ((tmp Reg (gen_bint x)))
+   (value_regs tmp (zero_reg))) 
+)
+
+;;;;;  Rules for `isplit`;;;;;;;;;
+(rule 
+  (lower (isplit x))
+  (let
+    ((t1 Reg (gen_move2 (value_regs_get x 0) $I64 $I64))
+      (t2 Reg (gen_move2 (value_regs_get x 1) $I64 $I64)))
+    (output_pair t1 t2)))
+
+;;;;;  Rules for `iconcat`;;;;;;;;;
+(rule 
+  (lower (has_type $I128 (iconcat x y)))
+  (let
+    ((t1 Reg (gen_move2 x $I64 $I64))
+      (t2 Reg (gen_move2 y $I64 $I64)))
+    (value_regs t1 t2)))
+
+;;;;;  Rules for `imax`;;;;;;;;;
+(rule
+  (lower (has_type ty (imax x y)))
+  (gen_int_select ty (IntSelectOP.Imax) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
+
+;;;;;  Rules for `imin`;;;;;;;;;
+(rule
+  (lower (has_type ty (imin x y)))
+  (gen_int_select ty(IntSelectOP.Imin) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
+;;;;;  Rules for `umax`;;;;;;;;;
+(rule
+  (lower (has_type ty (umax x y)))
+  (gen_int_select ty(IntSelectOP.Umax) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
+
+;;;;;  Rules for `umin`;;;;;;;;;
+(rule
+  (lower (has_type ty (umin x y)))
+  (gen_int_select ty(IntSelectOP.Umin) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
+
+;;;;;  Rules for `debugtrap`;;;;;;;;;
+(rule
+  (lower (debugtrap))
+  (side_effect (SideEffectNoResult.Inst(MInst.EBreak))))
+
+;;;;;  Rules for `fence`;;;;;;;;;
+(rule
+  (lower (fence))
+  (side_effect (SideEffectNoResult.Inst(MInst.Fence 15 15))))
+
+;;;;;  Rules for `trap`;;;;;;;;;
+(rule
+  (lower (trap code))
+  (udf code))
+
+;;;;;  Rules for `resumable_trap`;;;;;;;;;
+(rule
+  (lower (resumable_trap code))
+  (udf code))
+
+;;;;;  Rules for `uload8`;;;;;;;;;
+(rule
+  (lower (uload8 flags p offset))
+  (gen_load p offset (int_load_op $false 8) flags $I64))
+;;;;;  Rules for `sload8`;;;;;;;;;
+(rule
+  (lower (sload8 flags p offset))
+  (gen_load p offset (int_load_op $true 8) flags $I64))
+;;;;;  Rules for `uload16`;;;;;;;;;
+(rule
+  (lower (uload16 flags p offset))
+  (gen_load p offset (int_load_op $false 16) flags $I64))
+
+;;;;;  Rules for `iload16`;;;;;;;;;
+(rule
+  (lower (sload16 flags p offset))
+  (gen_load p offset (int_load_op $true 16) flags $I64))
+
+;;;;;  Rules for `uload32`;;;;;;;;;
+(rule
+  (lower (uload32 flags p offset))
+  (gen_load p offset (int_load_op $false 32) flags $I64))
+
+;;;;;  Rules for `iload16`;;;;;;;;;
+(rule
+  (lower (sload32 flags p offset))
+  (gen_load p offset (int_load_op $true 32) flags $I64))
+
+(rule
+  (lower (has_type ty (load flags p offset)))
+  (gen_load p offset (load_op ty) flags ty)
+)
+;;;; for I128
+(rule
+  (lower (has_type $I128 (load flags p offset)))
+  (gen_load_128 p offset flags))
+;;;; for B128
+(rule
+  (lower (has_type $B128 (load flags p offset)))
+  (gen_load_128 p offset flags))
+
+;;;;;  Rules for `istore8`;;;;;;;;;
+(rule 
+  (lower (istore8 flags x p offset))
+  (gen_store p offset (StoreOP.Sb) flags x))
+;;;;;  Rules for `istore16`;;;;;;;;;
+(rule 
+  (lower (istore16 flags x p offset))
+  (gen_store p offset (StoreOP.Sh) flags x))
+
+;;;;;  Rules for `istore32`;;;;;;;;;
+(rule 
+  (lower (istore32 flags x p offset))
+  (gen_store p offset (StoreOP.Sw) flags x))
+
+;;;;;  Rules for `store`;;;;;;;;;
+(rule
+  (lower (store flags x @(value_type ty) p offset))
+  (gen_store p offset (store_op ty) flags x))
+
+;;; special for I128
+(rule
+  (lower (store flags x @ (value_type $I128 ) p offset))
+  (gen_store_128 p offset flags x))
+
+;;; special for B128
+(rule
+  (lower (store flags x @ (value_type $B128 ) p offset))
+  (gen_store_128 p offset flags x))
+
+(decl gen_icmp(IntCC ValueRegs ValueRegs Type)Reg)
+(rule
+  (gen_icmp cc x y ty)
+  (let
+    ((result WritableReg (temp_writable_reg $I64))
+      (_ Unit (emit (MInst.Icmp cc result x y ty))))
+    result))
+
+;;;;;  Rules for `icmp`;;;;;;;;;
+(rule
+  (lower (icmp cc x @ (value_type ty) y))
+  (lower_icmp cc x y ty))
+;; special for `iadd_ifcout` first out.
+(rule
+  (lower (icmp cc (iadd_ifcout a @ (value_type ty) b) y))
+  (lower_icmp cc (alu_add a b) y ty))
+
+(rule
+  (lower (icmp cc x (iadd_ifcout a @ (value_type ty) b)))
+  (lower_icmp cc x (alu_add a b) ty))
+
+(decl gen_fcmp(FloatCC Value Value Type)Reg)
+(rule
+  (gen_fcmp cc x y ty)
+  (let
+    ((result WritableReg (temp_writable_reg $I64))
+      (_ Unit (emit (MInst.Fcmp cc result x y ty))))
+    (writable_reg_to_reg result)))
+
+;;;;;  Rules for `fcmp`;;;;;;;;;
+(rule
+  (lower (fcmp cc x @ (value_type ty) y))
+  (gen_fcmp cc x y ty))
+
+;;;;;  Rules for `func_addr`;;;;;;;;;
+(rule
+  (lower (func_addr (func_ref_data _ name _)))
+  (load_ext_name name 0))
+
+;;;;;  Rules for `fcvt_to_uint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_uint v @(value_type from))))
+  (gen_fcvt_int $false v $false from to))
+
+;;;;;  Rules for `fcvt_to_sint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_sint v @ (value_type from))))
+  (gen_fcvt_int $false v $true from to))
+
+;;;;;  Rules for `fcvt_to_sint_sat`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_sint_sat v @ (value_type from))))
+  (gen_fcvt_int $true v $true from to))
+
+;;;;;  Rules for `fcvt_to_uint_sat`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_to_uint_sat v @ (value_type from))))
+  (gen_fcvt_int $true v $false from to))
+
+;;;;;  Rules for `fcvt_from_sint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_from_sint v @ (value_type from))))
+  (fpu_rr (int_convert_2_float_op from $true to) to v))
+
+;;;;;  Rules for `fcvt_from_uint`;;;;;;;;;
+(rule
+  (lower (has_type to (fcvt_from_uint v @ (value_type from))))
+  (fpu_rr (int_convert_2_float_op from $false to) to v))
+
+;;;;;  Rules for `symbol_value`;;;;;;;;;
+(rule
+   (lower (symbol_value (symbol_value_data name _ offset)))
+   (load_ext_name name offset)
+)
+;;;;;  Rules for `bitcast`;;;;;;;;;
+(rule
+   (lower (has_type out (bitcast v @ (value_type in_ty))))
+   (gen_moves v in_ty out))
+
+;;;;;  Rules for `raw_bitcast`;;;;;;;;;
+(rule
+   (lower (has_type out (raw_bitcast v @ (value_type in_ty))))
+   (gen_moves v in_ty out))
+
+;;;;;  Rules for `ceil`;;;;;;;;;
+(rule 
+  (lower (has_type ty (ceil x)))
+  (gen_float_round (FloatRoundOP.Ceil) x ty)
+)
+
+;;;;;  Rules for `floor`;;;;;;;;;
+(rule 
+  (lower (has_type ty (floor x)))
+  (gen_float_round (FloatRoundOP.Floor) x ty))
+;;;;;  Rules for `trunc`;;;;;;;;;
+(rule 
+  (lower (has_type ty (trunc x)))
+  (gen_float_round (FloatRoundOP.Trunc) x ty))
+
+;;;;;  Rules for `nearest`;;;;;;;;;
+(rule 
+  (lower (has_type ty (nearest x)))
+  (gen_float_round (FloatRoundOP.Nearest) x ty))
+
+
+;;;;;  Rules for `selectif`;;;;;;;;;
+(rule
+  (lower (has_type r_ty (selectif cc (ifcmp ca @ (value_type cty) cb) a b)))
+  (let
+    ((dst VecWritableReg (alloc_vec_writable r_ty))
+      (r Reg (lower_icmp cc ca cb cty))
+      (_ Unit (emit (MInst.SelectIf $false (vec_writable_clone dst) r a b))))
+    (vec_writable_to_regs dst)))
+
+;;;;;  Rules for `selectif_spectre_guard`;;;;;;;;;
+(rule
+  (lower (has_type r_ty (selectif_spectre_guard cc (ifcmp ca @ (value_type cty) cb) a b)))
+  (let
+    ((dst VecWritableReg (alloc_vec_writable r_ty))
+      (r Reg (lower_icmp cc ca cb cty))
+      (_ Unit (emit (MInst.SelectIf $true (vec_writable_clone dst) r a b))))
+    (vec_writable_to_regs dst)))
+
+;;;;;  Rules for `trueif`;;;;;;;;;
+
+(rule
+  (lower (has_type ty (trueif cc (ifcmp ca @ (value_type cty) cb))))
+  (lower_icmp cc ca cb cty))
+
+;;;;;  Rules for `trueff`;;;;;;;;;
+(rule
+  (lower (has_type ty  (trueff cc (ffcmp ca @ (value_type cty) cb))))
+  (gen_fcmp cc ca cb cty))
+
+
+;;;;;  Rules for `trapif`;;;;;;;;;
+(rule
+  (lower (trapif cc (ifcmp a @ (value_type ty) b) trap_code))
+  (let
+    ((test Reg (lower_icmp cc a b ty)))
+    (gen_trapif test trap_code)))
+
+(rule
+  (lower (trapif _ (iadd_ifcout a @ (value_type ty) b) trap_code))
+  (let
+    ((test Reg (lower_uadd_overflow a b ty)))
+    (gen_trapif test trap_code)))
+
+
+;;;;;  Rules for `trapff`;;;;;;;;;
+(rule
+  (lower (trapff cc (ffcmp a @(value_type ty) b) trap_code))
+  (gen_trapff cc a b ty trap_code))
+
+;;;;;  Rules for `bmask`;;;;;;;;;
+(rule
+  ;; because we encode bool all 1s.
+  ;; move is just ok.
+  (lower (has_type (fits_in_64 ty) (bmask x @ (value_type ity))))
+  (gen_move2 (value_regs_get x 0) ity ty))
+;;; for i128 
+(rule
+  ;; because we encode bool all 1s.
+  ;; move is just ok.
+  (lower (has_type $I128 (bmask x @ (value_type ity))))
+  (value_regs (gen_move2 (value_regs_get x 0) $I64 $I64) (gen_move2 (value_regs_get x 0) $I64 $I64)))
+
+;;;;;  Rules for `bextend`;;;;;;;;;
+(rule
+  ;; because we encode bool all 1s.
+  ;; move is just ok.
+  (lower (has_type ty (bextend x @ (value_type ity))))
+  ;;extra checks.
+  (if-let _ (valid_bextend_ty ity ty))
+  (gen_moves x ity ty))
+
+;;; for B128
+(rule
+  ;; because we encode bool all 1s.
+  ;; move is just ok.
+  (lower (has_type ty (bextend x @ (value_type ity))))
+  ;;extra checks.
+  (if-let $B128 (valid_bextend_ty ity ty))
+  (value_regs (gen_moves x $I64 $I64) (gen_moves x $I64 $I64)))
+
+;; N.B.: the Ret itself is generated by the ABI.
+(rule (lower (return args))
+      (lower_return (range 0 (value_slice_len args)) args))
+
+
+;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;;
+
+(rule (lower (get_frame_pointer))
+  (gen_move2 (x_reg 8) $I64 $I64))
+
+(rule (lower (get_stack_pointer))
+  (gen_move2 (x_reg 2) $I64 $I64))
+
+(rule (lower (get_return_address))
+  (load_ra))
+
+;;; Rules for `iabs` ;;;;;;;;;;;;;
+(rule
+  (lower (has_type (fits_in_64 ty) (iabs x)))
+  (lower_iabs x ty))
+
+;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (call (func_ref_data sig_ref extname dist) inputs))
+  (gen_call sig_ref extname dist inputs))
+
+(rule (lower (call_indirect sig_ref val inputs))
+  (gen_call_indirect sig_ref val inputs))
--- a/cranelift/codegen/src/isa/riscv64/lower.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower.rs
@@ -0,0 +1,62 @@
+//! Lowering rules for Riscv64.
+use super::lower_inst;
+use crate::ir::Inst as IRInst;
+use crate::isa::riscv64::inst::*;
+use crate::isa::riscv64::Riscv64Backend;
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::CodegenResult;
+pub mod isle;
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for Riscv64Backend {
+    type MInst = Inst;
+
+    fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> CodegenResult<()> {
+        lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.triple, &self.flags, &self.isa_flags)
+    }
+
+    fn lower_branch_group(
+        &self,
+        ctx: &mut Lower<Inst>,
+        branches: &[IRInst],
+        targets: &[MachLabel],
+    ) -> CodegenResult<()> {
+        // A block should end with at most two branches. The first may be a
+        // conditional branch; a conditional branch can be followed only by an
+        // unconditional branch or fallthrough. Otherwise, if only one branch,
+        // it may be an unconditional branch, a fallthrough, a return, or a
+        // trap. These conditions are verified by `is_ebb_basic()` during the
+        // verifier pass.
+        assert!(branches.len() <= 2);
+        if branches.len() == 2 {
+            let op1 = ctx.data(branches[1]).opcode();
+            assert!(op1 == Opcode::Jump);
+        }
+
+        // Lower the first branch in ISLE.  This will automatically handle
+        // the second branch (if any) by emitting a two-way conditional branch.
+        if let Ok(()) = super::lower::isle::lower_branch(
+            ctx,
+            &self.triple,
+            &self.flags,
+            &self.isa_flags,
+            branches[0],
+            targets,
+        ) {
+            return Ok(());
+        }
+        unreachable!(
+            "implemented in ISLE: branch = `{}`",
+            ctx.dfg().display_inst(branches[0]),
+        );
+    }
+
+    fn maybe_pinned_reg(&self) -> Option<Reg> {
+        // pinned register is a register that you want put anything in it.
+        // right now riscv64 not support this feature.
+        None
+    }
+}
--- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
@@ -0,0 +1,544 @@
+//! ISLE integration glue code for riscv64 lowering.
+
+// Pull in the ISLE generated code.
+#[allow(unused)]
+pub mod generated_code;
+use generated_code::{Context, MInst};
+
+use target_lexicon::Triple;
+
+// Types that the generated ISLE code uses via `use super::*`.
+use super::{writable_zero_reg, zero_reg};
+use std::vec::Vec;
+
+use crate::isa::riscv64::settings::Flags as IsaFlags;
+use crate::machinst::{isle::*, MachInst, SmallInstVec};
+use crate::settings::Flags;
+
+use crate::machinst::{VCodeConstant, VCodeConstantData};
+use crate::{
+    ir::{
+        immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags,
+        StackSlot, TrapCode, Value, ValueList,
+    },
+    isa::riscv64::inst::*,
+    machinst::{ArgPair, InsnOutput, Lower},
+};
+use regalloc2::PReg;
+
+use crate::isa::riscv64::abi::Riscv64ABICaller;
+use std::boxed::Box;
+use std::convert::TryFrom;
+
+use crate::machinst::Reg;
+
+type BoxCallInfo = Box<CallInfo>;
+type BoxCallIndInfo = Box<CallIndInfo>;
+type BoxExternalName = Box<ExternalName>;
+type VecMachLabel = Vec<MachLabel>;
+type VecArgPair = Vec<ArgPair>;
+use crate::machinst::valueregs;
+
+/// The main entry point for lowering with ISLE.
+pub(crate) fn lower(
+    lower_ctx: &mut Lower<MInst>,
+    flags: &Flags,
+    triple: &Triple,
+    isa_flags: &IsaFlags,
+    outputs: &[InsnOutput],
+    inst: Inst,
+) -> Result<(), ()> {
+    lower_common(
+        lower_ctx,
+        triple,
+        flags,
+        isa_flags,
+        outputs,
+        inst,
+        |cx, insn| generated_code::constructor_lower(cx, insn),
+    )
+}
+
+impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
+    isle_prelude_method_helpers!(Riscv64ABICaller);
+}
+
+impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
+    isle_prelude_methods!();
+    isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller);
+
+    fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs {
+        match val.len() {
+            1 => ValueRegs::one(val[0].to_reg()),
+            2 => ValueRegs::two(val[0].to_reg(), val[1].to_reg()),
+            _ => unreachable!(),
+        }
+    }
+
+    fn valid_bextend_ty(&mut self, from: Type, to: Type) -> Option<Type> {
+        if from.is_bool() && to.is_bool() && from.bits() < to.bits() {
+            Some(to)
+        } else {
+            None
+        }
+    }
+    fn lower_br_fcmp(
+        &mut self,
+        cc: &FloatCC,
+        a: Reg,
+        b: Reg,
+        targets: &VecMachLabel,
+        ty: Type,
+    ) -> InstOutput {
+        let tmp = self.temp_writable_reg(I64);
+        MInst::lower_br_fcmp(
+            *cc,
+            a,
+            b,
+            BranchTarget::Label(targets[0]),
+            BranchTarget::Label(targets[1]),
+            ty,
+            tmp,
+        )
+        .iter()
+        .for_each(|i| self.emit(i));
+        InstOutput::default()
+    }
+
+    fn lower_brz_or_nz(
+        &mut self,
+        cc: &IntCC,
+        a: ValueRegs,
+        targets: &VecMachLabel,
+        ty: Type,
+    ) -> InstOutput {
+        MInst::lower_br_icmp(
+            *cc,
+            a,
+            self.int_zero_reg(ty),
+            BranchTarget::Label(targets[0]),
+            BranchTarget::Label(targets[1]),
+            ty,
+        )
+        .iter()
+        .for_each(|i| self.emit(i));
+        InstOutput::default()
+    }
+    fn lower_br_icmp(
+        &mut self,
+        cc: &IntCC,
+        a: ValueRegs,
+        b: ValueRegs,
+        targets: &VecMachLabel,
+        ty: Type,
+    ) -> InstOutput {
+        let test = generated_code::constructor_lower_icmp(self, cc, a, b, ty).unwrap();
+        self.emit(&MInst::CondBr {
+            taken: BranchTarget::Label(targets[0]),
+            not_taken: BranchTarget::Label(targets[1]),
+            kind: IntegerCompare {
+                kind: IntCC::NotEqual,
+                rs1: test,
+                rs2: zero_reg(),
+            },
+        });
+        InstOutput::default()
+    }
+    fn load_ra(&mut self) -> Reg {
+        if self.flags.preserve_frame_pointers() {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::Load {
+                rd: tmp,
+                op: LoadOP::Ld,
+                flags: MemFlags::trusted(),
+                from: AMode::FPOffset(8, I64),
+            });
+            tmp.to_reg()
+        } else {
+            self.gen_move2(link_reg(), I64, I64)
+        }
+    }
+    fn int_zero_reg(&mut self, ty: Type) -> ValueRegs {
+        assert!(ty.is_int() || ty.is_bool(), "{:?}", ty);
+        if ty.bits() == 128 {
+            ValueRegs::two(self.zero_reg(), self.zero_reg())
+        } else {
+            ValueRegs::one(self.zero_reg())
+        }
+    }
+
+    fn vec_label_get(&mut self, val: &VecMachLabel, x: u8) -> MachLabel {
+        val[x as usize]
+    }
+
+    fn label_to_br_target(&mut self, label: MachLabel) -> BranchTarget {
+        BranchTarget::Label(label)
+    }
+
+    fn vec_writable_clone(&mut self, v: &VecWritableReg) -> VecWritableReg {
+        v.clone()
+    }
+
+    fn gen_moves(&mut self, rs: ValueRegs, in_ty: Type, out_ty: Type) -> ValueRegs {
+        let tmp = construct_dest(|ty| self.temp_writable_reg(ty), out_ty);
+        if in_ty.bits() < 64 {
+            self.emit(&gen_move(tmp.regs()[0], out_ty, rs.regs()[0], in_ty));
+        } else {
+            gen_moves(tmp.regs(), rs.regs())
+                .iter()
+                .for_each(|i| self.emit(i));
+        }
+        tmp.map(|r| r.to_reg())
+    }
+    fn imm12_and(&mut self, imm: Imm12, x: i32) -> Imm12 {
+        Imm12::from_bits(imm.as_i16() & (x as i16))
+    }
+    fn alloc_vec_writable(&mut self, ty: Type) -> VecWritableReg {
+        if ty.is_int() || ty.is_bool() || ty == R32 || ty == R64 {
+            if ty.bits() <= 64 {
+                vec![self.temp_writable_reg(I64)]
+            } else {
+                vec![self.temp_writable_reg(I64), self.temp_writable_reg(I64)]
+            }
+        } else if ty.is_float() {
+            vec![self.temp_writable_reg(ty)]
+        } else {
+            unimplemented!("ty:{:?}", ty)
+        }
+    }
+
+    fn imm(&mut self, ty: Type, mut val: u64) -> Reg {
+        // Boolean types
+        // Boolean values are either true or false.
+
+        // The b1 type represents an abstract boolean value. It can only exist as an SSA value, and can't be directly stored in memory. It can, however, be converted into an integer with value 0 or 1 by the bint instruction (and converted back with icmp_imm with 0).
+
+        // Several larger boolean types are also defined, primarily to be used as SIMD element types. They can be stored in memory, and are represented as either all zero bits or all one bits.
+
+        // b1
+        // b8
+        // b16
+        // b32
+        // b64
+        // ///////////////////////////////////////////////////////////
+        // "represented as either all zero bits or all one bits."
+        // \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
+        if ty.is_bool() && val != 0 {
+            // need all be one
+            val = !0;
+        }
+        let tmp = self.temp_writable_reg(ty);
+        self.emit_list(&MInst::load_constant_u64(tmp, val));
+        tmp.to_reg()
+    }
+    #[inline]
+    fn emit(&mut self, arg0: &MInst) -> Unit {
+        self.lower_ctx.emit(arg0.clone());
+    }
+    #[inline]
+    fn imm12_from_u64(&mut self, arg0: u64) -> Option<Imm12> {
+        Imm12::maybe_from_u64(arg0)
+    }
+    #[inline]
+    fn writable_zero_reg(&mut self) -> WritableReg {
+        writable_zero_reg()
+    }
+    #[inline]
+    fn neg_imm12(&mut self, arg0: Imm12) -> Imm12 {
+        -arg0
+    }
+    #[inline]
+    fn zero_reg(&mut self) -> Reg {
+        zero_reg()
+    }
+    #[inline]
+    fn imm_from_bits(&mut self, val: u64) -> Imm12 {
+        Imm12::maybe_from_u64(val).unwrap()
+    }
+    #[inline]
+    fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 {
+        Imm12::maybe_from_u64(val as u64).unwrap()
+    }
+
+    fn gen_default_frm(&mut self) -> OptionFloatRoundingMode {
+        None
+    }
+    fn gen_select_reg(&mut self, cc: &IntCC, a: Reg, b: Reg, rs1: Reg, rs2: Reg) -> Reg {
+        let rd = self.temp_writable_reg(MInst::canonical_type_for_rc(rs1.class()));
+        self.emit(&MInst::SelectReg {
+            rd,
+            rs1,
+            rs2,
+            condition: IntegerCompare {
+                kind: *cc,
+                rs1: a,
+                rs2: b,
+            },
+        });
+        rd.to_reg()
+    }
+    fn load_u64_constant(&mut self, val: u64) -> Reg {
+        let rd = self.temp_writable_reg(I64);
+        MInst::load_constant_u64(rd, val)
+            .iter()
+            .for_each(|i| self.emit(i));
+        rd.to_reg()
+    }
+    fn u8_as_i32(&mut self, x: u8) -> i32 {
+        x as i32
+    }
+
+    fn ext_sign_bit(&mut self, ty: Type, r: Reg) -> Reg {
+        assert!(ty.is_int());
+        let rd = self.temp_writable_reg(I64);
+        self.emit(&MInst::AluRRImm12 {
+            alu_op: AluOPRRI::Bexti,
+            rd,
+            rs: r,
+            imm12: Imm12::from_bits((ty.bits() - 1) as i16),
+        });
+        rd.to_reg()
+    }
+    fn imm12_const(&mut self, val: i32) -> Imm12 {
+        Imm12::maybe_from_u64(val as u64).unwrap()
+    }
+    fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 {
+        Imm12::maybe_from_u64((val + add) as u64).unwrap()
+    }
+
+    //
+    fn gen_shamt(&mut self, ty: Type, shamt: Reg) -> ValueRegs {
+        let shamt = {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::AluRRImm12 {
+                alu_op: AluOPRRI::Andi,
+                rd: tmp,
+                rs: shamt,
+                imm12: Imm12::from_bits((ty.bits() - 1) as i16),
+            });
+            tmp.to_reg()
+        };
+        let len_sub_shamt = {
+            let len_sub_shamt = self.temp_writable_reg(I64);
+            self.emit(&MInst::load_imm12(
+                len_sub_shamt,
+                Imm12::from_bits(ty.bits() as i16),
+            ));
+            self.emit(&MInst::AluRRR {
+                alu_op: AluOPRRR::Sub,
+                rd: len_sub_shamt,
+                rs1: len_sub_shamt.to_reg(),
+                rs2: shamt,
+            });
+            len_sub_shamt.to_reg()
+        };
+        ValueRegs::two(shamt, len_sub_shamt)
+    }
+
+    fn has_b(&mut self) -> Option<bool> {
+        Some(self.isa_flags.has_b())
+    }
+    fn has_zbkb(&mut self) -> Option<bool> {
+        Some(self.isa_flags.has_zbkb())
+    }
+
+    fn valueregs_2_reg(&mut self, val: Value) -> Reg {
+        self.put_in_regs(val).regs()[0]
+    }
+
+    fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs {
+        x[index as usize]
+    }
+
+    fn move_f_to_x(&mut self, r: Reg, ty: Type) -> Reg {
+        let result = self.temp_writable_reg(I64);
+        self.emit(&gen_move(result, I64, r, ty));
+        result.to_reg()
+    }
+    fn offset32_imm(&mut self, offset: i32) -> Offset32 {
+        Offset32::new(offset)
+    }
+    fn default_memflags(&mut self) -> MemFlags {
+        MemFlags::new()
+    }
+    fn move_x_to_f(&mut self, r: Reg, ty: Type) -> Reg {
+        let result = self.temp_writable_reg(ty);
+        self.emit(&gen_move(result, ty, r, I64));
+        result.to_reg()
+    }
+
+    fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode {
+        Some(*f)
+    }
+
+    fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR {
+        FpuOPRR::int_convert_2_float_op(from, is_signed, to)
+    }
+    fn gen_amode(&mut self, base: Reg, offset: Offset32, ty: Type) -> AMode {
+        AMode::RegOffset(base, i64::from(offset), ty)
+    }
+    fn valid_atomic_transaction(&mut self, ty: Type) -> Option<Type> {
+        if ty.is_int() && ty.bits() <= 64 {
+            Some(ty)
+        } else {
+            None
+        }
+    }
+    fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> {
+        let op = *op;
+        match op {
+            crate::ir::AtomicRmwOp::Umin => Some((op, false)),
+            crate::ir::AtomicRmwOp::Umax => Some((op, false)),
+            crate::ir::AtomicRmwOp::Smin => Some((op, true)),
+            crate::ir::AtomicRmwOp::Smax => Some((op, true)),
+            _ => None,
+        }
+    }
+    fn load_op(&mut self, ty: Type) -> LoadOP {
+        LoadOP::from_type(ty)
+    }
+    fn store_op(&mut self, ty: Type) -> StoreOP {
+        StoreOP::from_type(ty)
+    }
+    fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg {
+        let tmp = self.temp_writable_reg(I64);
+        self.emit(&MInst::LoadExtName {
+            rd: tmp,
+            name: Box::new(name),
+            offset,
+        });
+        tmp.to_reg()
+    }
+
+    fn offset32_add(&mut self, a: Offset32, adden: i64) -> Offset32 {
+        a.try_add_i64(adden).expect("offset exceed range.")
+    }
+
+    fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg {
+        let result = self.temp_writable_reg(I64);
+        let i = self
+            .lower_ctx
+            .abi()
+            .sized_stackslot_addr(slot, i64::from(offset) as u32, result);
+        self.emit(&i);
+        result.to_reg()
+    }
+    fn atomic_amo(&mut self) -> AMO {
+        AMO::SeqCst
+    }
+
+    fn gen_move2(&mut self, r: Reg, ity: Type, oty: Type) -> Reg {
+        let tmp = self.temp_writable_reg(oty);
+        self.emit(&gen_move(tmp, oty, r, ity));
+        tmp.to_reg()
+    }
+
+    fn intcc_is_gt_etc(&mut self, cc: &IntCC) -> Option<(IntCC, bool)> {
+        let cc = *cc;
+        match cc {
+            IntCC::SignedLessThan => Some((cc, true)),
+            IntCC::SignedGreaterThanOrEqual => Some((cc, true)),
+            IntCC::SignedGreaterThan => Some((cc, true)),
+            IntCC::SignedLessThanOrEqual => Some((cc, true)),
+            //
+            IntCC::UnsignedLessThan => Some((cc, false)),
+            IntCC::UnsignedGreaterThanOrEqual => Some((cc, false)),
+            IntCC::UnsignedGreaterThan => Some((cc, false)),
+            IntCC::UnsignedLessThanOrEqual => Some((cc, false)),
+            _ => None,
+        }
+    }
+    fn intcc_is_eq_or_ne(&mut self, cc: &IntCC) -> Option<IntCC> {
+        let cc = *cc;
+        if cc == IntCC::Equal || cc == IntCC::NotEqual {
+            Some(cc)
+        } else {
+            None
+        }
+    }
+    fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> InstOutput {
+        let tmp = self.temp_writable_reg(I64);
+        let default_ = BranchTarget::Label(targets[0]);
+        let targets: Vec<BranchTarget> = targets
+            .iter()
+            .skip(1)
+            .map(|bix| BranchTarget::Label(*bix))
+            .collect();
+        self.emit(&MInst::BrTableCheck {
+            index,
+            targets_len: targets.len() as i32,
+            default_: default_,
+        });
+        self.emit(&MInst::BrTable {
+            index,
+            tmp1: tmp,
+            targets,
+        });
+        InstOutput::default()
+    }
+    fn x_reg(&mut self, x: u8) -> Reg {
+        x_reg(x as usize)
+    }
+    fn shift_int_to_most_significant(&mut self, v: Reg, ty: Type) -> Reg {
+        assert!(ty.is_int() && ty.bits() <= 64);
+        if ty == I64 {
+            return v;
+        }
+        let tmp = self.temp_writable_reg(I64);
+        self.emit(&MInst::AluRRImm12 {
+            alu_op: AluOPRRI::Slli,
+            rd: tmp,
+            rs: v,
+            imm12: Imm12::from_bits((64 - ty.bits()) as i16),
+        });
+
+        tmp.to_reg()
+    }
+}
+
+impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
+    #[inline]
+    fn emit_list(&mut self, list: &SmallInstVec<MInst>) {
+        for i in list {
+            self.lower_ctx.emit(i.clone());
+        }
+    }
+}
+
+/// The main entry point for branch lowering with ISLE.
+pub(crate) fn lower_branch(
+    lower_ctx: &mut Lower<MInst>,
+    triple: &Triple,
+    flags: &Flags,
+    isa_flags: &IsaFlags,
+    branch: Inst,
+    targets: &[MachLabel],
+) -> Result<(), ()> {
+    lower_common(
+        lower_ctx,
+        triple,
+        flags,
+        isa_flags,
+        &[],
+        branch,
+        |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()),
+    )
+}
+
+/// construct destination according to ty.
+fn construct_dest<F: std::ops::FnMut(Type) -> WritableReg>(
+    mut alloc: F,
+    ty: Type,
+) -> WritableValueRegs {
+    if ty.is_bool() || ty.is_int() {
+        if ty.bits() == 128 {
+            WritableValueRegs::two(alloc(I64), alloc(I64))
+        } else {
+            WritableValueRegs::one(alloc(I64))
+        }
+    } else if ty.is_float() {
+        WritableValueRegs::one(alloc(F64))
+    } else {
+        unimplemented!("vector type not implemented.");
+    }
+}
--- a/cranelift/codegen/src/isa/riscv64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle/generated_code.rs
@@ -0,0 +1,9 @@
+// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of
+// the generated ISLE source below because we include!() it. We must include!() it because its path
+// depends on an environment variable; and also because of this, we can't do the `#[path = "..."]
+// mod generated_code;` trick either.
+#![allow(dead_code, unreachable_code, unreachable_patterns)]
+#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]
+#![allow(irrefutable_let_patterns)]
+
+include!(concat!(env!("ISLE_DIR"), "/isle_riscv64.rs"));
--- a/cranelift/codegen/src/isa/riscv64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower_inst.rs
@@ -0,0 +1,36 @@
+//! Lower a single Cranelift instruction into vcode.
+
+use crate::ir::Inst as IRInst;
+
+use crate::isa::riscv64::settings as riscv64_settings;
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::settings::Flags;
+use crate::CodegenResult;
+
+use crate::isa::riscv64::inst::*;
+use target_lexicon::Triple;
+
+/// Actually codegen an instruction's results into registers.
+pub(crate) fn lower_insn_to_regs(
+    ctx: &mut Lower<Inst>,
+    insn: IRInst,
+    triple: &Triple,
+    flags: &Flags,
+    isa_flags: &riscv64_settings::Flags,
+) -> CodegenResult<()> {
+    let outputs = insn_outputs(ctx, insn);
+    let ty = if outputs.len() > 0 {
+        Some(ctx.output_ty(insn, 0))
+    } else {
+        None
+    };
+    if let Ok(()) = super::lower::isle::lower(ctx, flags, triple, isa_flags, &outputs, insn) {
+        return Ok(());
+    }
+    unreachable!(
+        "not implemented in ISLE: inst = `{}`, type = `{:?}`",
+        ctx.dfg().display_inst(insn),
+        ty
+    );
+}
--- a/cranelift/codegen/src/isa/riscv64/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/mod.rs
@@ -0,0 +1,246 @@
+//! risc-v 64-bit Instruction Set Architecture.
+
+use crate::ir;
+use crate::ir::condcodes::IntCC;
+use crate::ir::Function;
+
+use crate::isa::riscv64::settings as riscv_settings;
+use crate::isa::{Builder as IsaBuilder, TargetIsa};
+use crate::machinst::{
+    compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet,
+    TextSectionBuilder, VCode,
+};
+use crate::result::CodegenResult;
+use crate::settings as shared_settings;
+use alloc::{boxed::Box, vec::Vec};
+use core::fmt;
+use regalloc2::MachineEnv;
+use target_lexicon::{Architecture, Triple};
+mod abi;
+pub(crate) mod inst;
+mod lower;
+mod lower_inst;
+mod settings;
+#[cfg(feature = "unwind")]
+use crate::isa::unwind::systemv;
+
+use inst::crate_reg_eviroment;
+
+use self::inst::EmitInfo;
+
+/// An riscv64 backend.
+pub struct Riscv64Backend {
+    triple: Triple,
+    flags: shared_settings::Flags,
+    isa_flags: riscv_settings::Flags,
+    mach_env: MachineEnv,
+}
+
+impl Riscv64Backend {
+    /// Create a new riscv64 backend with the given (shared) flags.
+    pub fn new_with_flags(
+        triple: Triple,
+        flags: shared_settings::Flags,
+        isa_flags: riscv_settings::Flags,
+    ) -> Riscv64Backend {
+        let mach_env = crate_reg_eviroment(&flags);
+        Riscv64Backend {
+            triple,
+            flags,
+            isa_flags,
+            mach_env,
+        }
+    }
+
+    /// This performs lowering to VCode, register-allocates the code, computes block layout and
+    /// finalizes branches. The result is ready for binary emission.
+    fn compile_vcode(
+        &self,
+        func: &Function,
+        flags: shared_settings::Flags,
+    ) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
+        let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone());
+        let sigs = SigSet::new::<abi::Riscv64MachineDeps>(func, &self.flags)?;
+        let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?;
+        compile::compile::<Riscv64Backend>(func, self, abi, &self.mach_env, emit_info, sigs)
+    }
+}
+
+impl TargetIsa for Riscv64Backend {
+    fn compile_function(
+        &self,
+        func: &Function,
+        want_disasm: bool,
+    ) -> CodegenResult<CompiledCodeStencil> {
+        let flags = self.flags();
+        let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?;
+
+        let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
+        let emit_result = vcode.emit(&regalloc_result, want_disasm, flags.machine_code_cfg_info());
+        let frame_size = emit_result.frame_size;
+        let value_labels_ranges = emit_result.value_labels_ranges;
+        let buffer = emit_result.buffer.finish();
+        let sized_stackslot_offsets = emit_result.sized_stackslot_offsets;
+        let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets;
+
+        if let Some(disasm) = emit_result.disasm.as_ref() {
+            log::debug!("disassembly:\n{}", disasm);
+        }
+
+        Ok(CompiledCodeStencil {
+            buffer,
+            frame_size,
+            disasm: emit_result.disasm,
+            value_labels_ranges,
+            sized_stackslot_offsets,
+            dynamic_stackslot_offsets,
+            bb_starts: emit_result.bb_offsets,
+            bb_edges: emit_result.bb_edges,
+            alignment: emit_result.alignment,
+        })
+    }
+
+    fn name(&self) -> &'static str {
+        "riscv64"
+    }
+    fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 {
+        16
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.flags
+    }
+
+    fn isa_flags(&self) -> Vec<shared_settings::Value> {
+        self.isa_flags.iter().collect()
+    }
+
+    fn unsigned_add_overflow_condition(&self) -> IntCC {
+        IntCC::UnsignedGreaterThanOrEqual
+    }
+
+    #[cfg(feature = "unwind")]
+    fn emit_unwind_info(
+        &self,
+        result: &CompiledCode,
+        kind: crate::machinst::UnwindInfoKind,
+    ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+        use crate::isa::unwind::UnwindInfo;
+        use crate::machinst::UnwindInfoKind;
+        Ok(match kind {
+            UnwindInfoKind::SystemV => {
+                let mapper = self::inst::unwind::systemv::RegisterMapper;
+                Some(UnwindInfo::SystemV(
+                    crate::isa::unwind::systemv::create_unwind_info_from_insts(
+                        &result.buffer.unwind_info[..],
+                        result.buffer.data().len(),
+                        &mapper,
+                    )?,
+                ))
+            }
+            UnwindInfoKind::Windows => None,
+            _ => None,
+        })
+    }
+
+    #[cfg(feature = "unwind")]
+    fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+        Some(inst::unwind::systemv::create_cie())
+    }
+
+    fn text_section_builder(&self, num_funcs: u32) -> Box<dyn TextSectionBuilder> {
+        Box::new(MachTextSectionBuilder::<inst::Inst>::new(num_funcs))
+    }
+
+    #[cfg(feature = "unwind")]
+    fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result<u16, systemv::RegisterMappingError> {
+        inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
+    }
+
+    fn function_alignment(&self) -> u32 {
+        4
+    }
+}
+
+impl fmt::Display for Riscv64Backend {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("MachBackend")
+            .field("name", &self.name())
+            .field("triple", &self.triple())
+            .field("flags", &format!("{}", self.flags()))
+            .finish()
+    }
+}
+
+/// Create a new `isa::Builder`.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    match triple.architecture {
+        Architecture::Riscv64(..) => {}
+        _ => unreachable!(),
+    }
+    IsaBuilder {
+        triple,
+        setup: riscv_settings::builder(),
+        constructor: |triple, shared_flags, builder| {
+            let isa_flags = riscv_settings::Flags::new(&shared_flags, builder);
+            let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags);
+            Ok(Box::new(backend))
+        },
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::types::*;
+    use crate::ir::{AbiParam, Function, InstBuilder, Signature, UserFuncName};
+    use crate::isa::CallConv;
+    use crate::settings;
+    use crate::settings::Configurable;
+    use core::str::FromStr;
+    use target_lexicon::Triple;
+
+    #[test]
+    fn test_compile_function() {
+        let name = UserFuncName::testcase("test0");
+        let mut sig = Signature::new(CallConv::SystemV);
+        sig.params.push(AbiParam::new(I32));
+        sig.returns.push(AbiParam::new(I32));
+        let mut func = Function::with_name_signature(name, sig);
+
+        let bb0 = func.dfg.make_block();
+        let arg0 = func.dfg.append_block_param(bb0, I32);
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(bb0);
+        let v0 = pos.ins().iconst(I32, 0x1234);
+        let v1 = pos.ins().iadd(arg0, v0);
+        pos.ins().return_(&[v1]);
+
+        let mut shared_flags_builder = settings::builder();
+        shared_flags_builder.set("opt_level", "none").unwrap();
+        let shared_flags = settings::Flags::new(shared_flags_builder);
+        let isa_flags = riscv_settings::Flags::new(&shared_flags, riscv_settings::builder());
+        let backend = Riscv64Backend::new_with_flags(
+            Triple::from_str("riscv64").unwrap(),
+            shared_flags,
+            isa_flags,
+        );
+        let buffer = backend.compile_function(&mut func, true).unwrap();
+        let code = buffer.buffer.data();
+        // 0:   000015b7                lui     a1,0x1
+        // 4:   23458593                addi    a1,a1,564 # 0x1234
+        // 8:   00b5053b                addw    a0,a0,a1
+        // c:   00008067                ret
+        let golden = vec![
+            0xb7, 0x15, 0x0, 0x0, 0x93, 0x85, 0x45, 0x23, 0x3b, 0x5, 0xb5, 0x0, 0x67, 0x80, 0x0,
+            0x0,
+        ];
+        assert_eq!(code, &golden[..]);
+    }
+}
--- a/cranelift/codegen/src/isa/riscv64/settings.rs
+++ b/cranelift/codegen/src/isa/riscv64/settings.rs
@@ -0,0 +1,8 @@
+//! riscv64 Settings.
+
+use crate::settings::{self, detail, Builder, Value};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+include!(concat!(env!("OUT_DIR"), "/settings-riscv64.rs"));
--- a/cranelift/filetests/filetests/isa/riscv64/amodes.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/amodes.clif
@@ -0,0 +1,365 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f5(i64, i32) -> i32 {
+block0(v0: i64, v1: i32):
+  v2 = sextend.i64 v1
+  v3 = iadd.i64 v0, v2
+  v4 = load.i32 v3
+  return v4
+}
+
+; block0:
+;   sext.w a3,a1
+;   add a3,a0,a3
+;   lw a0,0(a3)
+;   ret
+
+function %f6(i64, i32) -> i32 {
+block0(v0: i64, v1: i32):
+  v2 = sextend.i64 v1
+  v3 = iadd.i64 v2, v0
+  v4 = load.i32 v3
+  return v4
+}
+
+; block0:
+;   sext.w a3,a1
+;   add a3,a3,a0
+;   lw a0,0(a3)
+;   ret
+
+function %f7(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = uextend.i64 v0
+  v3 = uextend.i64 v1
+  v4 = iadd.i64 v2, v3
+  v5 = load.i32 v4
+  return v5
+}
+
+; block0:
+;   uext.w a4,a0
+;   uext.w a5,a1
+;   add a4,a4,a5
+;   lw a0,0(a4)
+;   ret
+
+function %f8(i64, i32) -> i32 {
+block0(v0: i64, v1: i32):
+  v2 = sextend.i64 v1
+  v3 = iconst.i64 32
+  v4 = iadd.i64 v2, v3
+  v5 = iadd.i64 v4, v0
+  v6 = iadd.i64 v5, v5
+  v7 = load.i32 v6+4
+  return v7
+}
+
+; block0:
+;   sext.w a5,a1
+;   addi a5,a5,32
+;   add a5,a5,a0
+;   add a5,a5,a5
+;   lw a0,4(a5)
+;   ret
+
+function %f9(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i64 48
+  v4 = iadd.i64 v0, v1
+  v5 = iadd.i64 v4, v2
+  v6 = iadd.i64 v5, v3
+  v7 = load.i32 v6
+  return v7
+}
+
+; block0:
+;   add a5,a0,a1
+;   add a5,a5,a2
+;   addi a5,a5,48
+;   lw a0,0(a5)
+;   ret
+
+function %f10(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i64 4100
+  v4 = iadd.i64 v0, v1
+  v5 = iadd.i64 v4, v2
+  v6 = iadd.i64 v5, v3
+  v7 = load.i32 v6
+  return v7
+}
+
+; block0:
+;   add a6,a0,a1
+;   add a6,a6,a2
+;   lui a5,1
+;   addi a5,a5,4
+;   add t3,a6,a5
+;   lw a0,0(t3)
+;   ret
+
+function %f10() -> i32 {
+block0:
+  v1 = iconst.i64 1234
+  v2 = load.i32 v1
+  return v2
+}
+
+; block0:
+;   li t1,1234
+;   lw a0,0(t1)
+;   ret
+
+function %f11(i64) -> i32 {
+block0(v0: i64):
+  v1 = iconst.i64 8388608 ;; Imm12: 0x800 << 12
+  v2 = iadd.i64 v0, v1
+  v3 = load.i32 v2
+  return v3
+}
+
+; block0:
+;   lui a1,2048
+;   add a3,a0,a1
+;   lw a0,0(a3)
+;   ret
+
+function %f12(i64) -> i32 {
+block0(v0: i64):
+  v1 = iconst.i64 -4
+  v2 = iadd.i64 v0, v1
+  v3 = load.i32 v2
+  return v3
+}
+
+; block0:
+;   addi a1,a0,-4
+;   lw a0,0(a1)
+;   ret
+
+function %f13(i64) -> i32 {
+block0(v0: i64):
+  v1 = iconst.i64 1000000000
+  v2 = iadd.i64 v0, v1
+  v3 = load.i32 v2
+  return v3
+}
+
+; block0:
+;   lui a1,244141
+;   addi a1,a1,2560
+;   add a4,a0,a1
+;   lw a0,0(a4)
+;   ret
+
+function %f14(i32) -> i32 {
+block0(v0: i32):
+  v1 = sextend.i64 v0
+  v2 = load.i32 v1
+  return v2
+}
+
+; block0:
+;   sext.w a1,a0
+;   lw a0,0(a1)
+;   ret
+
+function %f15(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = sextend.i64 v0
+  v3 = sextend.i64 v1
+  v4 = iadd.i64 v2, v3
+  v5 = load.i32 v4
+  return v5
+}
+
+; block0:
+;   sext.w a4,a0
+;   sext.w a5,a1
+;   add a4,a4,a5
+;   lw a0,0(a4)
+;   ret
+
+function %f18(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 -4098
+  v6 = uextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; block0:
+;   lui a3,1048575
+;   addi a3,a3,4094
+;   uext.w a6,a3
+;   lh a0,0(a6)
+;   ret
+
+function %f19(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 4098
+  v6 = uextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; block0:
+;   lui a3,1
+;   addi a3,a3,2
+;   uext.w a6,a3
+;   lh a0,0(a6)
+;   ret
+
+function %f20(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 -4098
+  v6 = sextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; block0:
+;   lui a3,1048575
+;   addi a3,a3,4094
+;   sext.w a6,a3
+;   lh a0,0(a6)
+;   ret
+
+function %f21(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 4098
+  v6 = sextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; block0:
+;   lui a3,1
+;   addi a3,a3,2
+;   sext.w a6,a3
+;   lh a0,0(a6)
+;   ret
+
+function %i128(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0
+  store.i128 v1, v0
+  return v1
+}
+
+; block0:
+;   ld a1,0(a0)
+;   mv a3,a1
+;   ld a1,8(a0)
+;   mv a5,a3
+;   sd a5,0(a0)
+;   sd a1,8(a0)
+;   mv a0,a3
+;   ret
+
+function %i128_imm_offset(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0+16
+  store.i128 v1, v0+16
+  return v1
+}
+
+; block0:
+;   ld a1,16(a0)
+;   mv a3,a1
+;   ld a1,24(a0)
+;   mv a5,a3
+;   sd a5,16(a0)
+;   sd a1,24(a0)
+;   mv a0,a3
+;   ret
+
+function %i128_imm_offset_large(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0+504
+  store.i128 v1, v0+504
+  return v1
+}
+
+; block0:
+;   ld a1,504(a0)
+;   mv a3,a1
+;   ld a1,512(a0)
+;   mv a5,a3
+;   sd a5,504(a0)
+;   sd a1,512(a0)
+;   mv a0,a3
+;   ret
+
+function %i128_imm_offset_negative_large(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0-512
+  store.i128 v1, v0-512
+  return v1
+}
+
+; block0:
+;   ld a1,-512(a0)
+;   mv a3,a1
+;   ld a1,-504(a0)
+;   mv a5,a3
+;   sd a5,-512(a0)
+;   sd a1,-504(a0)
+;   mv a0,a3
+;   ret
+
+function %i128_add_offset(i64) -> i128 {
+block0(v0: i64):
+  v1 = iadd_imm v0, 32
+  v2 = load.i128 v1
+  store.i128 v2, v1
+  return v2
+}
+
+; block0:
+;   addi a3,a0,32
+;   ld a0,0(a3)
+;   ld a1,8(a3)
+;   sd a0,0(a3)
+;   sd a1,8(a3)
+;   ret
+
+function %i128_32bit_sextend_simple(i32) -> i128 {
+block0(v0: i32):
+  v1 = sextend.i64 v0
+  v2 = load.i128 v1
+  store.i128 v2, v1
+  return v2
+}
+
+; block0:
+;   sext.w a3,a0
+;   ld a0,0(a3)
+;   ld a1,8(a3)
+;   sd a0,0(a3)
+;   sd a1,8(a3)
+;   ret
+
+function %i128_32bit_sextend(i64, i32) -> i128 {
+block0(v0: i64, v1: i32):
+  v2 = sextend.i64 v1
+  v3 = iadd.i64 v0, v2
+  v4 = iadd_imm.i64 v3, 24
+  v5 = load.i128 v4
+  store.i128 v5, v4
+  return v5
+}
+
+; block0:
+;   sext.w a6,a1
+;   add a6,a0,a6
+;   addi a6,a6,24
+;   ld a0,0(a6)
+;   ld a1,8(a6)
+;   sd a0,0(a6)
+;   sd a1,8(a6)
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif
@@ -0,0 +1,509 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f1(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = iadd.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   add a0,a0,a1
+;   ret
+
+function %f2(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = isub.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   sub a0,a0,a1
+;   ret
+
+function %f3(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = imul.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   mul a0,a0,a1
+;   ret
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = umulhi.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   mulhu a0,a0,a1
+;   ret
+
+function %f5(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = smulhi.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   mulh a0,a0,a1
+;   ret
+
+function %f6(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = sdiv.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a2,-1
+;   li a3,1
+;   slli a5,a3,63
+;   eq a7,a2,a1##ty=i64
+;   eq t4,a5,a0##ty=i64
+;   and t1,a7,t4
+;   trap_if t1,int_ovf
+;   trap_ifc int_divz##(zero eq a1)
+;   div a0,a0,a1
+;   ret
+
+function %f7(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = sdiv.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,2
+;   li a2,-1
+;   li a4,1
+;   slli a6,a4,63
+;   eq t3,a2,a1##ty=i64
+;   eq t0,a6,a0##ty=i64
+;   and t2,t3,t0
+;   trap_if t2,int_ovf
+;   li a2,2
+;   trap_ifc int_divz##(zero eq a2)
+;   li a5,2
+;   div a0,a0,a5
+;   ret
+
+function %f8(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = udiv.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   trap_ifc int_divz##(zero eq a1)
+;   divu a0,a0,a1
+;   ret
+
+function %f9(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = udiv.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,2
+;   trap_ifc int_divz##(zero eq a1)
+;   li a3,2
+;   divu a0,a0,a3
+;   ret
+
+function %f10(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = srem.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   trap_ifc int_divz##(zero eq a1)
+;   rem a0,a0,a1
+;   ret
+
+function %f11(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = urem.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   trap_ifc int_divz##(zero eq a1)
+;   remu a0,a0,a1
+;   ret
+
+function %f12(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = sdiv.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   sext.w a2,a0
+;   sext.w a3,a1
+;   li a5,-1
+;   li a7,1
+;   slli t4,a7,63
+;   slli t1,a2,32
+;   eq a0,a5,a3##ty=i32
+;   eq a4,t4,t1##ty=i32
+;   and a4,a0,a4
+;   trap_if a4,int_ovf
+;   trap_ifc int_divz##(zero eq a3)
+;   divw a0,a2,a3
+;   ret
+
+function %f13(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 2
+  v2 = sdiv.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   sext.w a0,a0
+;   li a2,2
+;   sext.w a4,a2
+;   li a6,-1
+;   li t3,1
+;   slli t0,t3,63
+;   slli t2,a0,32
+;   eq a1,a6,a4##ty=i32
+;   eq a3,t0,t2##ty=i32
+;   and a5,a1,a3
+;   trap_if a5,int_ovf
+;   trap_ifc int_divz##(zero eq a4)
+;   divw a0,a0,a4
+;   ret
+
+function %f14(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = udiv.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.w a1,a1
+;   trap_ifc int_divz##(zero eq a1)
+;   uext.w a4,a0
+;   divuw a0,a4,a1
+;   ret
+
+function %f15(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 2
+  v2 = udiv.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,2
+;   uext.w a2,a1
+;   trap_ifc int_divz##(zero eq a2)
+;   uext.w a5,a0
+;   divuw a0,a5,a2
+;   ret
+
+function %f16(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = srem.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   sext.w a1,a1
+;   trap_ifc int_divz##(zero eq a1)
+;   remw a0,a0,a1
+;   ret
+
+function %f17(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = urem.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.w a1,a1
+;   trap_ifc int_divz##(zero eq a1)
+;   remuw a0,a0,a1
+;   ret
+
+function %f18(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = band.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   and a0,a0,a1
+;   ret
+
+function %f19(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = bor.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   or a0,a0,a1
+;   ret
+
+function %f20(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = bxor.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   xor a0,a0,a1
+;   ret
+
+function %f21(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = band_not.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   not a1,a1
+;   and a0,a0,a1
+;   ret
+
+function %f22(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = bor_not.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   not a1,a1
+;   or a0,a0,a1
+;   ret
+
+function %f23(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = bxor_not.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   not a1,a1
+;   xor a0,a0,a1
+;   ret
+
+function %f24(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = bnot.i64 v0
+  return v2
+}
+
+; block0:
+;   not a0,a0
+;   ret
+
+function %f25(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = iconst.i32 53
+  v3 = ishl.i32 v0, v2
+  v4 = isub.i32 v1, v3
+  return v4
+}
+
+; block0:
+;   slliw a2,a0,53
+;   subw a0,a1,a2
+;   ret
+
+function %f26(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 -1
+  v2 = iadd.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,-1
+;   addw a0,a0,a1
+;   ret
+
+function %f27(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 -1
+  v2 = isub.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,-1
+;   subw a0,a0,a1
+;   ret
+
+function %f28(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 -1
+  v2 = isub.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,-1
+;   sub a0,a0,a1
+;   ret
+
+function %f29(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 1
+  v2 = ineg v1
+  return v2
+}
+
+; block0:
+;   li a0,1
+;   sub a0,zero,a0
+;   ret
+
+function %add_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = iadd v0, v1
+    return v2
+}
+
+; block0:
+;   add a0,a0,a2
+;   sltu a6,a0,a2
+;   add t3,a1,a3
+;   add a1,t3,a6
+;   ret
+
+function %sub_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = isub v0, v1
+    return v2
+}
+
+; block0:
+;   sub a4,a0,a2
+;   mv t4,a4
+;   sltu a6,a0,t4
+;   sub t3,a1,a3
+;   sub a1,t3,a6
+;   mv a0,a4
+;   ret
+
+function %add_mul_2(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v2: i32):
+    v3 = imul v1, v2
+    v4 = iadd v3, v0
+    return v4
+}
+
+; block0:
+;   mulw a3,a1,a2
+;   addw a0,a3,a0
+;   ret
+
+function %msub_i32(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v2: i32):
+    v3 = imul v1, v2
+    v4 = isub v0, v3
+    return v4
+}
+
+; block0:
+;   mulw a3,a1,a2
+;   subw a0,a0,a3
+;   ret
+
+function %msub_i64(i64, i64, i64) -> i64 {
+block0(v0: i64, v1: i64, v2: i64):
+    v3 = imul v1, v2
+    v4 = isub v0, v3
+    return v4
+}
+
+; block0:
+;   mul a3,a1,a2
+;   sub a0,a0,a3
+;   ret
+
+function %imul_sub_i32(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v2: i32):
+    v3 = imul v1, v2
+    v4 = isub v3, v0
+    return v4
+}
+
+; block0:
+;   mulw a3,a1,a2
+;   subw a0,a3,a0
+;   ret
+
+function %imul_sub_i64(i64, i64, i64) -> i64 {
+block0(v0: i64, v1: i64, v2: i64):
+    v3 = imul v1, v2
+    v4 = isub v3, v0
+    return v4
+}
+
+; block0:
+;   mul a3,a1,a2
+;   sub a0,a3,a0
+;   ret
+
+function %srem_const (i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = srem.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,2
+;   trap_ifc int_divz##(zero eq a1)
+;   li a3,2
+;   rem a0,a0,a3
+;   ret
+
+function %urem_const (i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = urem.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,2
+;   trap_ifc int_divz##(zero eq a1)
+;   li a3,2
+;   remu a0,a0,a3
+;   ret
+
+function %sdiv_minus_one(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 -1
+  v2 = sdiv.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,-1
+;   li a2,-1
+;   li a4,1
+;   slli a6,a4,63
+;   eq t3,a2,a1##ty=i64
+;   eq t0,a6,a0##ty=i64
+;   and t2,t3,t0
+;   trap_if t2,int_ovf
+;   li a2,-1
+;   trap_ifc int_divz##(zero eq a2)
+;   li a5,-1
+;   div a0,a0,a5
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/atomic-rmw.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/atomic-rmw.clif
@@ -0,0 +1,210 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %atomic_rmw_add_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 add v0, v1
+    return
+}
+
+; block0:
+;   amoadd.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_add_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 add v0, v1
+    return
+}
+
+; block0:
+;   amoadd.w.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_sub_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 sub v0, v1
+    return
+}
+
+; block0:
+;   sub a1,zero,a1
+;   amoadd.d.aqrl a2,a1,(a0)
+;   ret
+
+function %atomic_rmw_sub_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 sub v0, v1
+    return
+}
+
+; block0:
+;   sub a1,zero,a1
+;   amoadd.w.aqrl a2,a1,(a0)
+;   ret
+
+function %atomic_rmw_and_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 and v0, v1
+    return
+}
+
+; block0:
+;   amoand.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_and_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 and v0, v1
+    return
+}
+
+; block0:
+;   amoand.w.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_nand_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 nand v0, v1
+    return
+}
+
+; block0:
+;   mv a3,a0
+;   mv a2,a1
+;   atomic_rmw.i64 nand a0,a2,(a3)##t0=a1 offset=zero
+;   ret
+
+function %atomic_rmw_nand_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 nand v0, v1
+    return
+}
+
+; block0:
+;   mv a3,a0
+;   mv a2,a1
+;   atomic_rmw.i32 nand a0,a2,(a3)##t0=a1 offset=zero
+;   ret
+
+function %atomic_rmw_or_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 or v0, v1
+    return
+}
+
+; block0:
+;   amoor.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_or_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 or v0, v1
+    return
+}
+
+; block0:
+;   amoor.w.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_xor_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 xor v0, v1
+    return
+}
+
+; block0:
+;   amoxor.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_xor_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 xor v0, v1
+    return
+}
+
+; block0:
+;   amoxor.w.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_smax_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 smax v0, v1
+    return
+}
+
+; block0:
+;   amomax.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_smax_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 smax v0, v1
+    return
+}
+
+; block0:
+;   amomax.w.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_umax_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 umax v0, v1
+    return
+}
+
+; block0:
+;   amomaxu.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_umax_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 umax v0, v1
+    return
+}
+
+; block0:
+;   amomaxu.w.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_smin_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 smin v0, v1
+    return
+}
+
+; block0:
+;   amomin.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_smin_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 smin v0, v1
+    return
+}
+
+; block0:
+;   amomin.w.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_umin_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+    v2 = atomic_rmw.i64 umin v0, v1
+    return
+}
+
+; block0:
+;   amominu.d.aqrl a0,a1,(a0)
+;   ret
+
+function %atomic_rmw_umin_i32(i64, i32) {
+block0(v0: i64, v1: i32):
+    v2 = atomic_rmw.i32 umin v0, v1
+    return
+}
+
+; block0:
+;   amominu.w.aqrl a0,a1,(a0)
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/atomic_load.clif
@@ -0,0 +1,36 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %atomic_load_i64(i64) -> i64 {
+block0(v0: i64):
+  v1 = atomic_load.i64 v0
+  return v1
+}
+
+; block0:
+;   atomic_load.i64 a0,(a0)
+;   ret
+
+function %atomic_load_i32(i64) -> i32 {
+block0(v0: i64):
+  v1 = atomic_load.i32 v0
+  return v1
+}
+
+; block0:
+;   atomic_load.i32 a0,(a0)
+;   ret
+
+function %atomic_load_i32_i64(i64) -> i64 {
+block0(v0: i64):
+  v1 = atomic_load.i32 v0
+  v2 = uextend.i64 v1
+  return v2
+}
+
+; block0:
+;   atomic_load.i32 a1,(a0)
+;   uext.w a0,a1
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/atomic_store.clif
@@ -0,0 +1,76 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %atomic_store_i64(i64, i64) {
+block0(v0: i64, v1: i64):
+  atomic_store.i64 v0, v1
+  return
+}
+
+; block0:
+;   atomic_store.i64 a0,(a1)
+;   ret
+
+function %atomic_store_i64_sym(i64) {
+  gv0 = symbol colocated %sym
+block0(v0: i64):
+  v1 = symbol_value.i64 gv0
+  atomic_store.i64 v0, v1
+  return
+}
+
+; block0:
+;   load_sym t2,%sym+0
+;   atomic_store.i64 a0,(t2)
+;   ret
+
+function %atomic_store_imm_i64(i64) {
+block0(v0: i64):
+  v1 = iconst.i64 12345
+  atomic_store.i64 v1, v0
+  return
+}
+
+; block0:
+;   lui t2,3
+;   addi t2,t2,57
+;   atomic_store.i64 t2,(a0)
+;   ret
+
+function %atomic_store_i32(i32, i64) {
+block0(v0: i32, v1: i64):
+  atomic_store.i32 v0, v1
+  return
+}
+
+; block0:
+;   atomic_store.i32 a0,(a1)
+;   ret
+
+function %atomic_store_i32_sym(i32) {
+  gv0 = symbol colocated %sym
+block0(v0: i32):
+  v1 = symbol_value.i64 gv0
+  atomic_store.i32 v0, v1
+  return
+}
+
+; block0:
+;   load_sym t2,%sym+0
+;   atomic_store.i32 a0,(t2)
+;   ret
+
+function %atomic_store_imm_i32(i64) {
+block0(v0: i64):
+  v1 = iconst.i32 12345
+  atomic_store.i32 v1, v0
+  return
+}
+
+; block0:
+;   lui t2,3
+;   addi t2,t2,57
+;   atomic_store.i32 t2,(a0)
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/bitops.clif
@@ -0,0 +1,929 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %a(i8) -> i8 {
+block0(v0: i8):
+    v1 = bitrev v0
+    return v1
+}
+
+; block0:
+;   brev8 a4,a0##tmp=a3 tmp2=a1 step=a2 ty=i8
+;   mv a0,a4
+;   ret
+
+function %a(i16) -> i16 {
+block0(v0: i16):
+    v1 = bitrev v0
+    return v1
+}
+
+; block0:
+;   mv t3,a0
+;   brev8 a3,t3##tmp=a0 tmp2=a1 step=a2 ty=i16
+;   rev8 a5,a3##step=a7 tmp=a6
+;   srli a0,a5,48
+;   ret
+
+function %a(i32) -> i32 {
+block0(v0: i32):
+    v1 = bitrev v0
+    return v1
+}
+
+; block0:
+;   mv t3,a0
+;   brev8 a3,t3##tmp=a0 tmp2=a1 step=a2 ty=i32
+;   rev8 a5,a3##step=a7 tmp=a6
+;   srli a0,a5,32
+;   ret
+
+function %a(i64) -> i64 {
+block0(v0: i64):
+    v1 = bitrev v0
+    return v1
+}
+
+; block0:
+;   rev8 a3,a0##step=a2 tmp=a1
+;   brev8 a0,a3##tmp=a4 tmp2=a5 step=a6 ty=i64
+;   ret
+
+function %a(i128) -> i128 {
+block0(v0: i128):
+    v1 = bitrev v0
+    return v1
+}
+
+; block0:
+;   rev8 a2,a0##step=a4 tmp=a3
+;   brev8 t4,a2##tmp=a6 tmp2=a7 step=t3 ty=i64
+;   rev8 t1,a1##step=a0 tmp=t2
+;   brev8 a0,t1##tmp=a2 tmp2=a3 step=a4 ty=i64
+;   mv a1,t4
+;   ret
+
+function %b(i8) -> i8 {
+block0(v0: i8):
+    v1 = clz v0
+    return v1
+}
+
+; block0:
+;   clz a3,a0##ty=i8 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %b(i16) -> i16 {
+block0(v0: i16):
+    v1 = clz v0
+    return v1
+}
+
+; block0:
+;   clz a3,a0##ty=i16 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %b(i32) -> i32 {
+block0(v0: i32):
+    v1 = clz v0
+    return v1
+}
+
+; block0:
+;   clz a3,a0##ty=i32 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %b(i64) -> i64 {
+block0(v0: i64):
+    v1 = clz v0
+    return v1
+}
+
+; block0:
+;   clz a3,a0##ty=i64 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %b(i128) -> i128 {
+block0(v0: i128):
+    v1 = clz v0
+    return v1
+}
+
+; block0:
+;   clz a4,a1##ty=i64 tmp=a2 step=a3
+;   clz t3,a0##ty=i64 tmp=a6 step=a7
+;   li t0,64
+;   select_reg t2,t3,zero##condition=(t0 eq a4)
+;   add a0,a4,t2
+;   mv a1,zero
+;   ret
+
+function %c(i8) -> i8 {
+block0(v0: i8):
+    v1 = cls v0
+    return v1
+}
+
+; block0:
+;   sext.b a1,a0
+;   not a2,a0
+;   select_reg a4,a2,a0##condition=(a1 slt zero)
+;   clz t3,a4##ty=i8 tmp=a6 step=a7
+;   addi a0,t3,-1
+;   ret
+
+function %c(i16) -> i16 {
+block0(v0: i16):
+    v1 = cls v0
+    return v1
+}
+
+; block0:
+;   sext.h a1,a0
+;   not a2,a0
+;   select_reg a4,a2,a0##condition=(a1 slt zero)
+;   clz t3,a4##ty=i16 tmp=a6 step=a7
+;   addi a0,t3,-1
+;   ret
+
+function %c(i32) -> i32 {
+block0(v0: i32):
+    v1 = cls v0
+    return v1
+}
+
+; block0:
+;   sext.w a1,a0
+;   not a2,a0
+;   select_reg a4,a2,a0##condition=(a1 slt zero)
+;   clz t3,a4##ty=i32 tmp=a6 step=a7
+;   addi a0,t3,-1
+;   ret
+
+function %c(i64) -> i64 {
+block0(v0: i64):
+    v1 = cls v0
+    return v1
+}
+
+; block0:
+;   not a1,a0
+;   select_reg a2,a1,a0##condition=(a0 slt zero)
+;   clz a6,a2##ty=i64 tmp=a4 step=a5
+;   addi a0,a6,-1
+;   ret
+
+function %c(i128) -> i128 {
+block0(v0: i128):
+    v1 = cls v0
+    return v1
+}
+
+; block0:
+;   not a2,a0
+;   select_reg a4,a2,a0##condition=(a1 slt zero)
+;   not a6,a1
+;   select_reg t3,a6,a1##condition=(a1 slt zero)
+;   clz t2,t3##ty=i64 tmp=t0 step=t1
+;   clz a3,a4##ty=i64 tmp=a1 step=a2
+;   li a5,64
+;   select_reg a7,a3,zero##condition=(a5 eq t2)
+;   add t4,t2,a7
+;   addi a0,t4,-1
+;   mv a1,zero
+;   ret
+
+function %d(i8) -> i8 {
+block0(v0: i8):
+    v1 = ctz v0
+    return v1
+}
+
+; block0:
+;   ctz a3,a0##ty=i8 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %d(i16) -> i16 {
+block0(v0: i16):
+    v1 = ctz v0
+    return v1
+}
+
+; block0:
+;   ctz a3,a0##ty=i16 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %d(i32) -> i32 {
+block0(v0: i32):
+    v1 = ctz v0
+    return v1
+}
+
+; block0:
+;   ctz a3,a0##ty=i32 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %d(i64) -> i64 {
+block0(v0: i64):
+    v1 = ctz v0
+    return v1
+}
+
+; block0:
+;   ctz a3,a0##ty=i64 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %d(i128) -> i128 {
+block0(v0: i128):
+    v1 = ctz v0
+    return v1
+}
+
+; block0:
+;   ctz a4,a0##ty=i64 tmp=a2 step=a3
+;   ctz t3,a1##ty=i64 tmp=a6 step=a7
+;   li t0,64
+;   select_reg t2,t3,zero##condition=(t0 eq a4)
+;   add a0,a4,t2
+;   mv a1,zero
+;   ret
+
+function %d(i128) -> i128 {
+block0(v0: i128):
+    v1 = popcnt v0
+    return v1
+}
+
+; block0:
+;   popcnt a4,a0##ty=i64 tmp=a2 step=a3
+;   popcnt t3,a1##ty=i64 tmp=a6 step=a7
+;   add a0,a4,t3
+;   mv a1,zero
+;   ret
+
+function %d(i64) -> i64 {
+block0(v0: i64):
+    v1 = popcnt v0
+    return v1
+}
+
+; block0:
+;   popcnt a3,a0##ty=i64 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %d(i32) -> i32 {
+block0(v0: i32):
+    v1 = popcnt v0
+    return v1
+}
+
+; block0:
+;   popcnt a3,a0##ty=i32 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %d(i16) -> i16 {
+block0(v0: i16):
+    v1 = popcnt v0
+    return v1
+}
+
+; block0:
+;   popcnt a3,a0##ty=i16 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %d(i8) -> i8 {
+block0(v0: i8):
+    v1 = popcnt v0
+    return v1
+}
+
+; block0:
+;   popcnt a3,a0##ty=i8 tmp=a2 step=a1
+;   mv a0,a3
+;   ret
+
+function %bextend_b8() -> b32 {
+block0:
+    v1 = bconst.b8 true
+    v2 = bextend.b32 v1
+    return v2
+}
+
+; block0:
+;   li a0,-1
+;   ret
+
+function %bextend_b1() -> b32 {
+block0:
+    v1 = bconst.b1 true
+    v2 = bextend.b32 v1
+    return v2
+}
+
+; block0:
+;   li a0,-1
+;   ret
+
+function %bnot_i32(i32) -> i32 {
+block0(v0: i32):
+    v1 = bnot v0
+    return v1
+}
+
+; block0:
+;   not a0,a0
+;   ret
+
+function %bnot_i64(i64) -> i64 {
+block0(v0: i64):
+    v1 = bnot v0
+    return v1
+}
+
+; block0:
+;   not a0,a0
+;   ret
+
+function %bnot_i64_with_shift(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = ishl.i64 v0, v1
+    v3 = bnot v2
+    return v3
+}
+
+; block0:
+;   slli a1,a0,3
+;   not a0,a1
+;   ret
+
+function %bnot_i128(i128) -> i128 {
+block0(v0: i128):
+    v1 = bnot v0
+    return v1
+}
+
+; block0:
+;   not a0,a0
+;   not a1,a1
+;   ret
+
+function %band_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = band v0, v1
+    return v2
+}
+
+; block0:
+;   and a0,a0,a1
+;   ret
+
+function %band_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = band v0, v1
+    return v2
+}
+
+; block0:
+;   and a0,a0,a1
+;   ret
+
+function %band_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = band v0, v1
+    return v2
+}
+
+; block0:
+;   and a0,a0,a2
+;   and a1,a1,a3
+;   ret
+
+function %band_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = band v0, v1
+    return v2
+}
+
+; block0:
+;   andi a0,a0,3
+;   ret
+
+function %band_i64_constant2(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = band v1, v0
+    return v2
+}
+
+; block0:
+;   andi a0,a0,3
+;   ret
+
+function %band_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = band v0, v3
+    return v4
+}
+
+; block0:
+;   slli a2,a1,3
+;   and a0,a0,a2
+;   ret
+
+function %band_i64_constant_shift2(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = band v3, v0
+    return v4
+}
+
+; block0:
+;   slli a2,a1,3
+;   and a0,a2,a0
+;   ret
+
+function %bor_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bor v0, v1
+    return v2
+}
+
+; block0:
+;   or a0,a0,a1
+;   ret
+
+function %bor_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bor v0, v1
+    return v2
+}
+
+; block0:
+;   or a0,a0,a1
+;   ret
+
+function %bor_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = bor v0, v1
+    return v2
+}
+
+; block0:
+;   or a0,a0,a2
+;   or a1,a1,a3
+;   ret
+
+function %bor_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bor v0, v1
+    return v2
+}
+
+; block0:
+;   ori a0,a0,3
+;   ret
+
+function %bor_i64_constant2(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bor v1, v0
+    return v2
+}
+
+; block0:
+;   ori a0,a0,3
+;   ret
+
+function %bor_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bor v0, v3
+    return v4
+}
+
+; block0:
+;   slli a2,a1,3
+;   or a0,a0,a2
+;   ret
+
+function %bor_i64_constant_shift2(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bor v3, v0
+    return v4
+}
+
+; block0:
+;   slli a2,a1,3
+;   or a0,a2,a0
+;   ret
+
+function %bxor_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bxor v0, v1
+    return v2
+}
+
+; block0:
+;   xor a0,a0,a1
+;   ret
+
+function %bxor_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bxor v0, v1
+    return v2
+}
+
+; block0:
+;   xor a0,a0,a1
+;   ret
+
+function %bxor_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = bxor v0, v1
+    return v2
+}
+
+; block0:
+;   xor a0,a0,a2
+;   xor a1,a1,a3
+;   ret
+
+function %bxor_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bxor v0, v1
+    return v2
+}
+
+; block0:
+;   xori a0,a0,3
+;   ret
+
+function %bxor_i64_constant2(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bxor v1, v0
+    return v2
+}
+
+; block0:
+;   xori a0,a0,3
+;   ret
+
+function %bxor_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bxor v0, v3
+    return v4
+}
+
+; block0:
+;   slli a2,a1,3
+;   xor a0,a0,a2
+;   ret
+
+function %bxor_i64_constant_shift2(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bxor v3, v0
+    return v4
+}
+
+; block0:
+;   slli a2,a1,3
+;   xor a0,a2,a0
+;   ret
+
+function %band_not_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = band_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a1,a1
+;   and a0,a0,a1
+;   ret
+
+function %band_not_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = band_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a1,a1
+;   and a0,a0,a1
+;   ret
+
+function %band_not_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = band_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a4,a2
+;   and a0,a0,a4
+;   not t3,a3
+;   and a1,a1,t3
+;   ret
+
+function %band_not_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 4
+    v2 = band_not v0, v1
+    return v2
+}
+
+; block0:
+;   li a1,4
+;   not a2,a1
+;   and a0,a0,a2
+;   ret
+
+function %band_not_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 4
+    v3 = ishl.i64 v1, v2
+    v4 = band_not v0, v3
+    return v4
+}
+
+; block0:
+;   slli a3,a1,4
+;   not a2,a3
+;   and a0,a0,a2
+;   ret
+
+function %bor_not_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bor_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a1,a1
+;   or a0,a0,a1
+;   ret
+
+function %bor_not_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bor_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a1,a1
+;   or a0,a0,a1
+;   ret
+
+function %bor_not_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = bor_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a4,a2
+;   or a0,a0,a4
+;   not t3,a3
+;   or a1,a1,t3
+;   ret
+
+function %bor_not_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 4
+    v2 = bor_not v0, v1
+    return v2
+}
+
+; block0:
+;   li a1,4
+;   not a2,a1
+;   or a0,a0,a2
+;   ret
+
+function %bor_not_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 4
+    v3 = ishl.i64 v1, v2
+    v4 = bor_not v0, v3
+    return v4
+}
+
+; block0:
+;   slli a3,a1,4
+;   not a2,a3
+;   or a0,a0,a2
+;   ret
+
+function %bxor_not_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bxor_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a1,a1
+;   xor a0,a0,a1
+;   ret
+
+function %bxor_not_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bxor_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a1,a1
+;   xor a0,a0,a1
+;   ret
+
+function %bxor_not_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = bxor_not v0, v1
+    return v2
+}
+
+; block0:
+;   not a4,a2
+;   xor a0,a0,a4
+;   not t3,a3
+;   xor a1,a1,t3
+;   ret
+
+function %bxor_not_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 4
+    v2 = bxor_not v0, v1
+    return v2
+}
+
+; block0:
+;   li a1,4
+;   not a2,a1
+;   xor a0,a0,a2
+;   ret
+
+function %bxor_not_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 4
+    v3 = ishl.i64 v1, v2
+    v4 = bxor_not v0, v3
+    return v4
+}
+
+; block0:
+;   slli a3,a1,4
+;   not a2,a3
+;   xor a0,a0,a2
+;   ret
+
+function %ishl_i128_i8(i128, i8) -> i128 {
+block0(v0: i128, v1: i8):
+    v2 = ishl.i128 v0, v1
+    return v2
+}
+
+; block0:
+;   andi a3,a2,127
+;   li a5,128
+;   sub a5,a5,a3
+;   sll t3,a0,a3
+;   srl t0,a0,a5
+;   select_reg t2,zero,t0##condition=(a3 eq zero)
+;   sll a1,a1,a3
+;   or a4,t2,a1
+;   li a5,64
+;   select_reg a0,zero,t3##condition=(a3 uge a5)
+;   select_reg a1,t3,a4##condition=(a3 uge a5)
+;   ret
+
+function %ishl_i128_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = ishl.i128 v0, v1
+    return v2
+}
+
+; block0:
+;   andi a4,a2,127
+;   li a6,128
+;   sub a6,a6,a4
+;   sll t4,a0,a4
+;   srl t1,a0,a6
+;   select_reg a0,zero,t1##condition=(a4 eq zero)
+;   sll a2,a1,a4
+;   or a5,a0,a2
+;   li a6,64
+;   select_reg a0,zero,t4##condition=(a4 uge a6)
+;   select_reg a1,t4,a5##condition=(a4 uge a6)
+;   ret
+
+function %ushr_i128_i8(i128, i8) -> i128 {
+block0(v0: i128, v1: i8):
+    v2 = ushr.i128 v0, v1
+    return v2
+}
+
+; block0:
+;   andi a3,a2,127
+;   li a5,128
+;   sub a5,a5,a3
+;   sll t3,a1,a5
+;   select_reg t0,zero,t3##condition=(a3 eq zero)
+;   srl t2,a0,a3
+;   or a2,t0,t2
+;   li a4,64
+;   srl a5,a1,a3
+;   select_reg a0,a5,a2##condition=(a3 uge a4)
+;   select_reg a1,zero,a5##condition=(a3 uge a4)
+;   ret
+
+function %ushr_i128_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = ushr.i128 v0, v1
+    return v2
+}
+
+; block0:
+;   andi a4,a2,127
+;   li a6,128
+;   sub a6,a6,a4
+;   sll t4,a1,a6
+;   select_reg t1,zero,t4##condition=(a4 eq zero)
+;   srl a0,a0,a4
+;   or a2,t1,a0
+;   li a5,64
+;   srl a6,a1,a4
+;   select_reg a0,a6,a2##condition=(a4 uge a5)
+;   select_reg a1,zero,a6##condition=(a4 uge a5)
+;   ret
+
+function %sshr_i128_i8(i128, i8) -> i128 {
+block0(v0: i128, v1: i8):
+    v2 = sshr.i128 v0, v1
+    return v2
+}
+
+; block0:
+;   andi a3,a2,127
+;   li a5,128
+;   sub a5,a5,a3
+;   sll t3,a1,a5
+;   select_reg t0,zero,t3##condition=(a3 eq zero)
+;   srl t2,a0,a3
+;   or a2,t0,t2
+;   li a4,64
+;   sra a5,a1,a3
+;   li a7,-1
+;   select_reg t4,a7,zero##condition=(a1 slt zero)
+;   select_reg a0,a5,a2##condition=(a3 uge a4)
+;   select_reg a1,t4,a5##condition=(a3 uge a4)
+;   ret
+
+function %sshr_i128_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = sshr.i128 v0, v1
+    return v2
+}
+
+; block0:
+;   andi a4,a2,127
+;   li a6,128
+;   sub a6,a6,a4
+;   sll t4,a1,a6
+;   select_reg t1,zero,t4##condition=(a4 eq zero)
+;   srl a0,a0,a4
+;   or a2,t1,a0
+;   li a5,64
+;   sra a6,a1,a4
+;   li t3,-1
+;   select_reg t0,t3,zero##condition=(a1 slt zero)
+;   select_reg a0,a6,a2##condition=(a4 uge a5)
+;   select_reg a1,t0,a6##condition=(a4 uge a5)
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/call-indirect.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/call-indirect.clif
@@ -0,0 +1,22 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f(i64, i64) -> i64 {
+    sig0 = (i64) -> i64
+block0(v0: i64, v1: i64):
+    v2 = call_indirect.i64 sig0, v1(v0)
+    return v2
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   callind a1
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/call.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/call.clif
@@ -0,0 +1,424 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f1(i64) -> i64 {
+    fn0 = %g(i64) -> i64
+
+block0(v0: i64):
+    v1 = call fn0(v0)
+    return v1
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   load_sym a1,%g+0
+;   callind a1
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f2(i32) -> i64 {
+    fn0 = %g(i32 uext) -> i64 
+
+block0(v0: i32):
+    v1 = call fn0(v0)
+    return v1
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   uext.w a0,a0
+;   load_sym a3,%g+0
+;   callind a3
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f3(i32) -> i32 uext  {
+block0(v0: i32):
+    return v0
+}
+
+; block0:
+;   uext.w a0,a0
+;   ret
+
+function %f4(i32) -> i64 {
+    fn0 = %g(i32 sext) -> i64 
+
+block0(v0: i32):
+    v1 = call fn0(v0)
+    return v1
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   sext.w a0,a0
+;   load_sym a3,%g+0
+;   callind a3
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f5(i32) -> i32 sext  {
+block0(v0: i32):
+    return v0
+}
+
+; block0:
+;   sext.w a0,a0
+;   ret
+
+function %f6(i8) -> i64 {
+    fn0 = %g(i32, i32, i32, i32, i32, i32, i32, i32, i8 sext) -> i64
+
+block0(v0: i8):
+    v1 = iconst.i32 42
+    v2 = call fn0(v1, v1, v1, v1, v1, v1, v1, v1, v0)
+    return v2
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   mv t3,a0
+;   add sp,-16
+;   virtual_sp_offset_adj +16
+;   li a0,42
+;   li a1,42
+;   li a2,42
+;   li a3,42
+;   li a4,42
+;   li a5,42
+;   li a6,42
+;   li a7,42
+;   sext.b t3,t3
+;   sd t3,0(sp)
+;   load_sym t4,%g+0
+;   callind t4
+;   add sp,+16
+;   virtual_sp_offset_adj -16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f7(i8) -> i32, i32, i32, i32, i32, i32, i32, i32, i8 sext {
+block0(v0: i8):
+    v1 = iconst.i32 42
+    return v1, v1, v1, v1, v1, v1, v1, v1, v0
+}
+
+; block0:
+;   mv a7,a0
+;   mv a6,a1
+;   li a0,42
+;   li a1,42
+;   li a2,42
+;   li a5,42
+;   li t3,42
+;   li t1,42
+;   li a3,42
+;   li a4,42
+;   mv t2,a7
+;   mv t0,a6
+;   sw a2,0(t0)
+;   sw a5,8(t0)
+;   sw t3,16(t0)
+;   sw t1,24(t0)
+;   sw a3,32(t0)
+;   sw a4,40(t0)
+;   sext.b t2,t2
+;   sd t2,48(t0)
+;   ret
+
+function %f8() {
+    fn0 = %g0() -> f32
+    fn1 = %g1() -> f64
+    fn2 = %g2()
+    fn3 = %g3(f32)
+    fn4 = %g4(f64)
+
+block0:
+    v0 = call fn0()
+    v1 = call fn1()
+    v2 = call fn1()
+    call fn2()
+    call fn3(v0)
+    call fn4(v1)
+    call fn4(v2)
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   fsd fs2,-8(sp)
+;   fsd fs3,-16(sp)
+;   fsd fs11,-24(sp)
+;   add sp,-32
+; block0:
+;   load_sym a6,%g0+0
+;   callind a6
+;   fmv.d fs11,fa0
+;   load_sym a6,%g1+0
+;   callind a6
+;   fmv.d fs2,fa0
+;   load_sym a6,%g1+0
+;   callind a6
+;   fmv.d fs3,fa0
+;   load_sym a6,%g2+0
+;   callind a6
+;   load_sym a7,%g3+0
+;   fmv.d fa0,fs11
+;   callind a7
+;   load_sym t3,%g4+0
+;   fmv.d fa0,fs2
+;   callind t3
+;   load_sym t4,%g4+0
+;   fmv.d fa0,fs3
+;   callind t4
+;   add sp,+32
+;   fld fs2,-8(sp)
+;   fld fs3,-16(sp)
+;   fld fs11,-24(sp)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f11(i128, i64) -> i64 {
+block0(v0: i128, v1: i64):
+    v2, v3 = isplit v0
+    return v3
+}
+
+; block0:
+;   mv a2,a0
+;   mv a0,a1
+;   ret
+
+function %f11_call(i64) -> i64 {
+    fn0 = %f11(i128, i64) -> i64
+
+block0(v0: i64):
+    v1 = iconst.i64 42
+    v2 = iconcat v1, v0
+    v3 = call fn0(v2, v1)
+    return v3
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   mv a6,a0
+;   li a0,42
+;   mv a1,a6
+;   li a2,42
+;   load_sym a6,%f11+0
+;   callind a6
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f12(i64, i128) -> i64 {
+block0(v0: i64, v1: i128):
+    v2, v3 = isplit v1
+    return v2
+}
+
+; block0:
+;   mv a0,a1
+;   ret
+
+function %f12_call(i64) -> i64 {
+    fn0 = %f12(i64, i128) -> i64
+
+block0(v0: i64):
+    v1 = iconst.i64 42
+    v2 = iconcat v0, v1
+    v3 = call fn0(v1, v2)
+    return v3
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   mv a1,a0
+;   li a2,42
+;   li a0,42
+;   load_sym a6,%f12+0
+;   callind a6
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f13(i64, i128) -> i64 {
+block0(v0: i64, v1: i128):
+    v2, v3 = isplit v1
+    return v2
+}
+
+; block0:
+;   mv a0,a1
+;   ret
+
+function %f13_call(i64) -> i64 {
+    fn0 = %f13(i64, i128) -> i64
+
+block0(v0: i64):
+    v1 = iconst.i64 42
+    v2 = iconcat v0, v1
+    v3 = call fn0(v1, v2)
+    return v3
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   mv a1,a0
+;   li a2,42
+;   li a0,42
+;   load_sym a6,%f13+0
+;   callind a6
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f14(i128, i128, i128, i64, i128) -> i128 {
+block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128):
+    return v4
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   ld a1,16(fp)
+;   mv a0,a7
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f14_call(i128, i64) -> i128 {
+    fn0 = %f14(i128, i128, i128, i64, i128) -> i128
+
+block0(v0: i128, v1: i64):
+    v2 = call fn0(v0, v0, v0, v1, v0)
+    return v2
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   mv a7,a0
+;   mv a6,a2
+;   add sp,-16
+;   virtual_sp_offset_adj +16
+;   sd a1,0(sp)
+;   mv a5,a1
+;   load_sym t3,%f14+0
+;   mv a1,a5
+;   mv a3,a5
+;   mv a0,a7
+;   mv a2,a7
+;   mv a4,a7
+;   callind t3
+;   add sp,+16
+;   virtual_sp_offset_adj -16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f15(i128, i128, i128, i64, i128) -> i128{
+block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128):
+    return v4
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   ld a1,16(fp)
+;   mv a0,a7
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f15_call(i128, i64) -> i128 {
+    fn0 = %f15(i128, i128, i128, i64, i128) -> i128
+
+block0(v0: i128, v1: i64):
+    v2 = call fn0(v0, v0, v0, v1, v0)
+    return v2
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   mv a7,a0
+;   mv a6,a2
+;   add sp,-16
+;   virtual_sp_offset_adj +16
+;   sd a1,0(sp)
+;   mv a5,a1
+;   load_sym t3,%f15+0
+;   mv a1,a5
+;   mv a3,a5
+;   mv a0,a7
+;   mv a2,a7
+;   mv a4,a7
+;   callind t3
+;   add sp,+16
+;   virtual_sp_offset_adj -16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f16() -> i32, i32 {
+block0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    return v0, v1
+}
+
+; block0:
+;   li a0,0
+;   li a1,1
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/condbr.clif
@@ -0,0 +1,391 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f(i64, i64) -> b1 {
+block0(v0: i64, v1: i64):
+  v2 = icmp eq v0, v1
+  return v2
+}
+
+; block0:
+;   eq a0,a0,a1##ty=i64
+;   ret
+
+function %icmp_eq_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp eq v0, v1
+  return v2
+}
+
+; block0:
+;   eq a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_ne_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp ne v0, v1
+  return v2
+}
+
+; block0:
+;   ne a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_slt_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp slt v0, v1
+  return v2
+}
+
+; block0:
+;   slt a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_ult_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp ult v0, v1
+  return v2
+}
+
+; block0:
+;   ult a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_sle_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp sle v0, v1
+  return v2
+}
+
+; block0:
+;   sle a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_ule_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp ule v0, v1
+  return v2
+}
+
+; block0:
+;   ule a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_sgt_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp sgt v0, v1
+  return v2
+}
+
+; block0:
+;   sgt a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_ugt_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp ugt v0, v1
+  return v2
+}
+
+; block0:
+;   ugt a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_sge_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp sge v0, v1
+  return v2
+}
+
+; block0:
+;   sge a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %icmp_uge_i128(i128, i128) -> b1 {
+block0(v0: i128, v1: i128):
+  v2 = icmp uge v0, v1
+  return v2
+}
+
+; block0:
+;   uge a0,[a0,a1],[a2,a3]##ty=i128
+;   ret
+
+function %f(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = ifcmp v0, v1
+  brif eq v2, block1
+  jump block2
+
+block1:
+  v4 = iconst.i64 1
+  return v4
+
+block2:
+  v5 = iconst.i64 2
+  return v5
+}
+
+; block0:
+;   eq a3,a0,a1##ty=i64
+;   bne a3,zero,taken(label1),not_taken(label2)
+; block1:
+;   li a0,1
+;   ret
+; block2:
+;   li a0,2
+;   ret
+
+function %f(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = ifcmp v0, v1
+  brif eq v2, block1
+  jump block1
+
+block1:
+  v4 = iconst.i64 1
+  return v4
+}
+
+; block0:
+;   eq a2,a0,a1##ty=i64
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   li a0,1
+;   ret
+
+function %i128_brz(i128){
+block0(v0: i128):
+    brz v0, block1
+    jump block1
+
+block1:
+    nop
+    return
+}
+
+; block0:
+;   bne a1,zero,taken(label2),not_taken(0)
+;   beq a0,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_brnz(i128){
+block0(v0: i128):
+    brnz v0, block1
+    jump block1
+
+block1:
+    nop
+    return
+}
+
+; block0:
+;   bne a1,zero,taken(label1),not_taken(0)
+;   bne a0,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_eq(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp eq v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   eq a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_ne(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp ne v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   ne a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_slt(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp slt v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   slt a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_ult(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp ult v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   ult a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_sle(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp sle v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   sle a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_ule(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp ule v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   ule a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_sgt(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp sgt v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   sgt a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_ugt(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp ugt v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   ugt a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_sge(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp sge v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   sge a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
+function %i128_bricmp_uge(i128, i128) {
+block0(v0: i128, v1: i128):
+  br_icmp uge v0, v1, block1
+  jump block1
+
+block1:
+  return
+}
+
+; block0:
+;   uge a2,[a0,a1],[a2,a3]##ty=i128
+;   bne a2,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/condops.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/condops.clif
@@ -0,0 +1,86 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f(i8, i64, i64) -> i64 {
+block0(v0: i8, v1: i64, v2: i64):
+  v3 = iconst.i8 42
+  v4 = ifcmp v0, v3
+  v5 = selectif.i64 eq v4, v1, v2
+  return v5
+}
+
+; block0:
+;   li a3,42
+;   uext.b a5,a0
+;   uext.b a7,a3
+;   eq t4,a5,a7##ty=i8
+;   selectif a0,a1,a2##test=t4
+;   ret
+
+function %g(i8) -> b1 {
+block0(v0: i8):
+  v3 = iconst.i8 42
+  v4 = ifcmp v0, v3
+  v5 = trueif eq v4
+  return v5
+}
+
+; block0:
+;   mv a5,a0
+;   li a0,42
+;   uext.b a2,a5
+;   uext.b a4,a0
+;   eq a0,a2,a4##ty=i8
+;   ret
+
+function %h(i8, i8, i8) -> i8 {
+block0(v0: i8, v1: i8, v2: i8):
+  v3 = bitselect.i8 v0, v1, v2
+  return v3
+}
+
+; block0:
+;   mv t3,a2
+;   and a2,a0,a1
+;   not a4,a0
+;   and a6,a4,t3
+;   or a0,a2,a6
+;   ret
+
+function %i(b1, i8, i8) -> i8 {
+block0(v0: b1, v1: i8, v2: i8):
+  v3 = select.i8 v0, v1, v2
+  return v3
+}
+
+; block0:
+;   select_i8 a0,a1,a2##condition=a0
+;   ret
+
+function %i(i32, i8, i8) -> i8 {
+block0(v0: i32, v1: i8, v2: i8):
+  v3 = iconst.i32 42
+  v4 = icmp.i32 eq v0, v3
+  v5 = select.i8 v4, v1, v2
+  return v5
+}
+
+; block0:
+;   li a3,42
+;   uext.w a5,a0
+;   uext.w a7,a3
+;   eq t4,a5,a7##ty=i32
+;   select_i8 a0,a1,a2##condition=t4
+;   ret
+
+function %i128_select(b1, i128, i128) -> i128 {
+block0(v0: b1, v1: i128, v2: i128):
+  v3 = select.i128 v0, v1, v2
+  return v3
+}
+
+; block0:
+;   select_i128 [a0,a1],[a1,a2],[a3,a4]##condition=a0
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/constants.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/constants.clif
@@ -0,0 +1,328 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f() -> b8 {
+block0:
+  v0 = bconst.b8 true
+  return v0
+}
+
+; block0:
+;   li a0,-1
+;   ret
+
+function %f() -> b16 {
+block0:
+  v0 = bconst.b16 false
+  return v0
+}
+
+; block0:
+;   li a0,0
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0
+  return v0
+}
+
+; block0:
+;   li a0,0
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffff
+  return v0
+}
+
+; block0:
+;   lui a0,16
+;   addi a0,a0,4095
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffff0000
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xffff0000
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffff00000000
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xffff00000000
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffff000000000000
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xffff000000000000
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffffffffffffffff
+  return v0
+}
+
+; block0:
+;   li a0,-1
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffffffffffff0000
+  return v0
+}
+
+; block0:
+;   lui a0,1048560
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffffffff0000ffff
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xffffffff0000ffff
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xffff0000ffffffff
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xffff0000ffffffff
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0x0000ffffffffffff
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xffffffffffff
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xf34bf0a31212003a ;; random digits
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xf34bf0a31212003a
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0x12e900001ef40000 ;; random digits with 2 clear half words
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0x12e900001ef40000
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0x12e9ffff1ef4ffff ;; random digits with 2 full half words
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0x12e9ffff1ef4ffff
+;   ret
+
+function %f() -> i32 {
+block0:
+  v0 = iconst.i32 -1
+  return v0
+}
+
+; block0:
+;   li a0,-1
+;   ret
+
+function %f() -> i32 {
+block0:
+  v0 = iconst.i32 0xfffffff7
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xfffffff7
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xfffffff7
+  return v0
+}
+
+; block0:
+;   auipc a0,0
+;   ld a0,12(a0)
+;   j 12
+;   .8byte 0xfffffff7
+;   ret
+
+function %f() -> i64 {
+block0:
+  v0 = iconst.i64 0xfffffffffffffff7
+  return v0
+}
+
+; block0:
+;   li a0,-9
+;   ret
+
+function %f() -> f64 {
+block0:
+  v0 = f64const 0x1.0
+  return v0
+}
+
+; block0:
+;   auipc t2,0
+;   ld t2,12(t2)
+;   j 12
+;   .8byte 0x3ff0000000000000
+;   fmv.d.x fa0,t2
+;   ret
+
+function %f() -> f32 {
+block0:
+  v0 = f32const 0x5.0
+  return v0
+}
+
+; block0:
+;   lui t2,264704
+;   fmv.w.x fa0,t2
+;   ret
+
+function %f() -> f64 {
+block0:
+  v0 = f64const 0x32.0
+  return v0
+}
+
+; block0:
+;   auipc t2,0
+;   ld t2,12(t2)
+;   j 12
+;   .8byte 0x4049000000000000
+;   fmv.d.x fa0,t2
+;   ret
+
+function %f() -> f32 {
+block0:
+  v0 = f32const 0x32.0
+  return v0
+}
+
+; block0:
+;   lui t2,271488
+;   fmv.w.x fa0,t2
+;   ret
+
+function %f() -> f64 {
+block0:
+  v0 = f64const 0x0.0
+  return v0
+}
+
+; block0:
+;   li t2,0
+;   fmv.d.x fa0,t2
+;   ret
+
+function %f() -> f32 {
+block0:
+  v0 = f32const 0x0.0
+  return v0
+}
+
+; block0:
+;   li t2,0
+;   fmv.w.x fa0,t2
+;   ret
+
+function %f() -> f64 {
+block0:
+  v0 = f64const -0x10.0
+  return v0
+}
+
+; block0:
+;   auipc t2,0
+;   ld t2,12(t2)
+;   j 12
+;   .8byte 0xc030000000000000
+;   fmv.d.x fa0,t2
+;   ret
+
+function %f() -> f32 {
+block0:
+  v0 = f32const -0x10.0
+  return v0
+}
+
+; block0:
+;   auipc t2,0
+;   lwu t2,12(t2)
+;   j 8
+;   .4byte 0xc1800000
+;   fmv.w.x fa0,t2
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/extend-op.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/extend-op.clif
@@ -0,0 +1,119 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f(i8) -> i64 {
+block0(v0: i8):
+  v1 = sextend.i64 v0
+  v2 = iconst.i64 42
+  v3 = iadd.i64 v2, v1
+  return v3
+}
+
+; block0:
+;   sext.b a1,a0
+;   addi a0,a1,42
+;   ret
+
+function %f2(i8, i64) -> i64 {
+block0(v0: i8, v1: i64):
+  v2 = sextend.i64 v0
+  v3 = iadd.i64 v2, v1
+  return v3
+}
+
+; block0:
+;   sext.b a2,a0
+;   add a0,a2,a1
+;   ret
+
+function %i128_uextend_i64(i64) -> i128 {
+block0(v0: i64):
+    v1 = uextend.i128 v0
+    return v1
+}
+
+; block0:
+;   mv a1,zero
+;   ret
+
+function %i128_sextend_i64(i64) -> i128 {
+block0(v0: i64):
+    v1 = sextend.i128 v0
+    return v1
+}
+
+; block0:
+;   slt a1,a0,zero
+;   sext.b1 a1,a1
+;   ret
+
+function %i128_uextend_i32(i32) -> i128 {
+block0(v0: i32):
+    v1 = uextend.i128 v0
+    return v1
+}
+
+; block0:
+;   uext.w a0,a0
+;   mv a1,zero
+;   ret
+
+function %i128_sextend_i32(i32) -> i128 {
+block0(v0: i32):
+    v1 = sextend.i128 v0
+    return v1
+}
+
+; block0:
+;   sext.w a1,a0
+;   slt a3,a1,zero
+;   sext.b1 a1,a3
+;   ret
+
+function %i128_uextend_i16(i16) -> i128 {
+block0(v0: i16):
+    v1 = uextend.i128 v0
+    return v1
+}
+
+; block0:
+;   uext.h a0,a0
+;   mv a1,zero
+;   ret
+
+function %i128_sextend_i16(i16) -> i128 {
+block0(v0: i16):
+    v1 = sextend.i128 v0
+    return v1
+}
+
+; block0:
+;   sext.h a1,a0
+;   slt a3,a1,zero
+;   sext.b1 a1,a3
+;   ret
+
+function %i128_uextend_i8(i8) -> i128 {
+block0(v0: i8):
+    v1 = uextend.i128 v0
+    return v1
+}
+
+; block0:
+;   uext.b a0,a0
+;   mv a1,zero
+;   ret
+
+function %i128_sextend_i8(i8) -> i128 {
+block0(v0: i8):
+    v1 = sextend.i128 v0
+    return v1
+}
+
+; block0:
+;   sext.b a1,a0
+;   slt a3,a1,zero
+;   sext.b1 a1,a3
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif
@@ -0,0 +1,84 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function u0:0(i8) -> f32 {
+block0(v0: i8):
+    v1 = fcvt_from_uint.f32 v0
+    return v1
+}
+
+; block0:
+;   fcvt.s.lu fa0,a0
+;   ret
+
+function u0:0(i8) -> f64 {
+block0(v0: i8):
+    v1 = fcvt_from_uint.f64 v0
+    return v1
+}
+
+; block0:
+;   fcvt.d.lu fa0,a0
+;   ret
+
+function u0:0(i16) -> f32 {
+block0(v0: i16):
+    v1 = fcvt_from_uint.f32 v0
+    return v1
+}
+
+; block0:
+;   fcvt.s.lu fa0,a0
+;   ret
+
+function u0:0(i16) -> f64 {
+block0(v0: i16):
+    v1 = fcvt_from_uint.f64 v0
+    return v1
+}
+
+; block0:
+;   fcvt.d.lu fa0,a0
+;   ret
+
+function u0:0(f32) -> i8 {
+block0(v0: f32):
+    v1 = fcvt_to_uint.i8 v0
+    return v1
+}
+
+; block0:
+;   fcvt_to_uint.i8 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function u0:0(f64) -> i8 {
+block0(v0: f64):
+    v1 = fcvt_to_uint.i8 v0
+    return v1
+}
+
+; block0:
+;   fcvt_to_uint.i8 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function u0:0(f32) -> i16 {
+block0(v0: f32):
+    v1 = fcvt_to_uint.i16 v0
+    return v1
+}
+
+; block0:
+;   fcvt_to_uint.i16 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function u0:0(f64) -> i16 {
+block0(v0: f64):
+    v1 = fcvt_to_uint.i16 v0
+    return v1
+}
+
+; block0:
+;   fcvt_to_uint.i16 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/float.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/float.clif
@@ -0,0 +1,576 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f1(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+  v2 = fadd v0, v1
+  return v2
+}
+
+; block0:
+;   fadd.s fa0,fa0,fa1
+;   ret
+
+function %f2(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+  v2 = fadd v0, v1
+  return v2
+}
+
+; block0:
+;   fadd.d fa0,fa0,fa1
+;   ret
+
+function %f3(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+  v2 = fsub v0, v1
+  return v2
+}
+
+; block0:
+;   fsub.s fa0,fa0,fa1
+;   ret
+
+function %f4(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+  v2 = fsub v0, v1
+  return v2
+}
+
+; block0:
+;   fsub.d fa0,fa0,fa1
+;   ret
+
+function %f5(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+  v2 = fmul v0, v1
+  return v2
+}
+
+; block0:
+;   fmul.s fa0,fa0,fa1
+;   ret
+
+function %f6(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+  v2 = fmul v0, v1
+  return v2
+}
+
+; block0:
+;   fmul.d fa0,fa0,fa1
+;   ret
+
+function %f7(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+  v2 = fdiv v0, v1
+  return v2
+}
+
+; block0:
+;   fdiv.s fa0,fa0,fa1
+;   ret
+
+function %f8(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+  v2 = fdiv v0, v1
+  return v2
+}
+
+; block0:
+;   fdiv.d fa0,fa0,fa1
+;   ret
+
+function %f9(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+  v2 = fmin v0, v1
+  return v2
+}
+
+; block0:
+;   fmin.s ft4,fa0,fa1##tmp=a2 ty=f32
+;   fmv.d fa0,ft4
+;   ret
+
+function %f10(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+  v2 = fmin v0, v1
+  return v2
+}
+
+; block0:
+;   fmin.d ft4,fa0,fa1##tmp=a2 ty=f64
+;   fmv.d fa0,ft4
+;   ret
+
+function %f11(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+  v2 = fmax v0, v1
+  return v2
+}
+
+; block0:
+;   fmax.s ft4,fa0,fa1##tmp=a2 ty=f32
+;   fmv.d fa0,ft4
+;   ret
+
+function %f12(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+  v2 = fmax v0, v1
+  return v2
+}
+
+; block0:
+;   fmax.d ft4,fa0,fa1##tmp=a2 ty=f64
+;   fmv.d fa0,ft4
+;   ret
+
+function %f13(f32) -> f32 {
+block0(v0: f32):
+  v1 = sqrt v0
+  return v1
+}
+
+; block0:
+;   fsqrt.s fa0,fa0
+;   ret
+
+function %f15(f64) -> f64 {
+block0(v0: f64):
+  v1 = sqrt v0
+  return v1
+}
+
+; block0:
+;   fsqrt.d fa0,fa0
+;   ret
+
+function %f16(f32) -> f32 {
+block0(v0: f32):
+  v1 = fabs v0
+  return v1
+}
+
+; block0:
+;   fabs.s fa0,fa0
+;   ret
+
+function %f17(f64) -> f64 {
+block0(v0: f64):
+  v1 = fabs v0
+  return v1
+}
+
+; block0:
+;   fabs.d fa0,fa0
+;   ret
+
+function %f18(f32) -> f32 {
+block0(v0: f32):
+  v1 = fneg v0
+  return v1
+}
+
+; block0:
+;   fneg.s fa0,fa0
+;   ret
+
+function %f19(f64) -> f64 {
+block0(v0: f64):
+  v1 = fneg v0
+  return v1
+}
+
+; block0:
+;   fneg.d fa0,fa0
+;   ret
+
+function %f20(f32) -> f64 {
+block0(v0: f32):
+  v1 = fpromote.f64 v0
+  return v1
+}
+
+; block0:
+;   fcvt.d.s fa0,fa0
+;   ret
+
+function %f21(f64) -> f32 {
+block0(v0: f64):
+  v1 = fdemote.f32 v0
+  return v1
+}
+
+; block0:
+;   fcvt.s.d fa0,fa0
+;   ret
+
+function %f22(f32) -> f32 {
+block0(v0: f32):
+  v1 = ceil v0
+  return v1
+}
+
+; block0:
+;   ceil ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32
+;   fmv.d fa0,ft3
+;   ret
+
+function %f22(f64) -> f64 {
+block0(v0: f64):
+  v1 = ceil v0
+  return v1
+}
+
+; block0:
+;   ceil ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64
+;   fmv.d fa0,ft3
+;   ret
+
+function %f23(f32) -> f32 {
+block0(v0: f32):
+  v1 = floor v0
+  return v1
+}
+
+; block0:
+;   floor ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32
+;   fmv.d fa0,ft3
+;   ret
+
+function %f24(f64) -> f64 {
+block0(v0: f64):
+  v1 = floor v0
+  return v1
+}
+
+; block0:
+;   floor ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64
+;   fmv.d fa0,ft3
+;   ret
+
+function %f25(f32) -> f32 {
+block0(v0: f32):
+  v1 = trunc v0
+  return v1
+}
+
+; block0:
+;   trunc ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32
+;   fmv.d fa0,ft3
+;   ret
+
+function %f26(f64) -> f64 {
+block0(v0: f64):
+  v1 = trunc v0
+  return v1
+}
+
+; block0:
+;   trunc ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64
+;   fmv.d fa0,ft3
+;   ret
+
+function %f27(f32) -> f32 {
+block0(v0: f32):
+  v1 = nearest v0
+  return v1
+}
+
+; block0:
+;   nearest ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32
+;   fmv.d fa0,ft3
+;   ret
+
+function %f28(f64) -> f64 {
+block0(v0: f64):
+  v1 = nearest v0
+  return v1
+}
+
+; block0:
+;   nearest ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64
+;   fmv.d fa0,ft3
+;   ret
+
+function %f29(f32, f32, f32) -> f32 {
+block0(v0: f32, v1: f32, v2: f32):
+  v3 = fma v0, v1, v2
+  return v3
+}
+
+; block0:
+;   fmadd.s fa0,fa0,fa1,fa2
+;   ret
+
+function %f30(f64, f64, f64) -> f64 {
+block0(v0: f64, v1: f64, v2: f64):
+  v3 = fma v0, v1, v2
+  return v3
+}
+
+; block0:
+;   fmadd.d fa0,fa0,fa1,fa2
+;   ret
+
+function %f31(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+  v2 = fcopysign v0, v1
+  return v2
+}
+
+; block0:
+;   fsgnj.s fa0,fa0,fa1
+;   ret
+
+function %f32(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+  v2 = fcopysign v0, v1
+  return v2
+}
+
+; block0:
+;   fsgnj.d fa0,fa0,fa1
+;   ret
+
+function %f33(f32) -> i32 {
+block0(v0: f32):
+  v1 = fcvt_to_uint.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint.i32 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f34(f32) -> i32 {
+block0(v0: f32):
+  v1 = fcvt_to_sint.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint.i32 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f35(f32) -> i64 {
+block0(v0: f32):
+  v1 = fcvt_to_uint.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint.i64 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f36(f32) -> i64 {
+block0(v0: f32):
+  v1 = fcvt_to_sint.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint.i64 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f37(f64) -> i32 {
+block0(v0: f64):
+  v1 = fcvt_to_uint.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint.i32 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function %f38(f64) -> i32 {
+block0(v0: f64):
+  v1 = fcvt_to_sint.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint.i32 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function %f39(f64) -> i64 {
+block0(v0: f64):
+  v1 = fcvt_to_uint.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint.i64 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function %f40(f64) -> i64 {
+block0(v0: f64):
+  v1 = fcvt_to_sint.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint.i64 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function %f41(i32) -> f32 {
+block0(v0: i32):
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+}
+
+; block0:
+;   fcvt.s.wu fa0,a0
+;   ret
+
+function %f42(i32) -> f32 {
+block0(v0: i32):
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+}
+
+; block0:
+;   fcvt.s.w fa0,a0
+;   ret
+
+function %f43(i64) -> f32 {
+block0(v0: i64):
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+}
+
+; block0:
+;   fcvt.s.lu fa0,a0
+;   ret
+
+function %f44(i64) -> f32 {
+block0(v0: i64):
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+}
+
+; block0:
+;   fcvt.s.l fa0,a0
+;   ret
+
+function %f45(i32) -> f64 {
+block0(v0: i32):
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+}
+
+; block0:
+;   fcvt.d.wu fa0,a0
+;   ret
+
+function %f46(i32) -> f64 {
+block0(v0: i32):
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+}
+
+; block0:
+;   fcvt.d.w fa0,a0
+;   ret
+
+function %f47(i64) -> f64 {
+block0(v0: i64):
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+}
+
+; block0:
+;   fcvt.d.lu fa0,a0
+;   ret
+
+function %f48(i64) -> f64 {
+block0(v0: i64):
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+}
+
+; block0:
+;   fcvt.d.l fa0,a0
+;   ret
+
+function %f49(f32) -> i32 {
+block0(v0: f32):
+  v1 = fcvt_to_uint_sat.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint_sat.i32 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f50(f32) -> i32 {
+block0(v0: f32):
+  v1 = fcvt_to_sint_sat.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint_sat.i32 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f51(f32) -> i64 {
+block0(v0: f32):
+  v1 = fcvt_to_uint_sat.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint_sat.i64 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f52(f32) -> i64 {
+block0(v0: f32):
+  v1 = fcvt_to_sint_sat.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint_sat.i64 a0,fa0##in_ty=f32 tmp=ft4
+;   ret
+
+function %f53(f64) -> i32 {
+block0(v0: f64):
+  v1 = fcvt_to_uint_sat.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint_sat.i32 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function %f54(f64) -> i32 {
+block0(v0: f64):
+  v1 = fcvt_to_sint_sat.i32 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint_sat.i32 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function %f55(f64) -> i64 {
+block0(v0: f64):
+  v1 = fcvt_to_uint_sat.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_uint_sat.i64 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
+function %f56(f64) -> i64 {
+block0(v0: f64):
+  v1 = fcvt_to_sint_sat.i64 v0
+  return v1
+}
+
+; block0:
+;   fcvt_to_sint_sat.i64 a0,fa0##in_ty=f64 tmp=ft4
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
@@ -0,0 +1,53 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %dynamic_heap_check(i64 vmctx, i32) -> i64 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32
+
+block0(v0: i64, v1: i32):
+    v2 = heap_addr.i64 heap0, v1, 0
+    return v2
+}
+
+; block0:
+;   uext.w t3,a1
+;   ld t4,0(a0)
+;   addi t4,t4,0
+;   ugt t0,t3,t4##ty=i64
+;   beq t0,zero,taken(label1),not_taken(label2)
+; block1:
+;   add t0,a0,t3
+;   ugt t3,t3,t4##ty=i64
+;   li t1,0
+;   selectif_spectre_guard a0,t1,t0##test=t3
+;   ret
+; block2:
+;   udf##trap_code=heap_oob
+
+function %static_heap_check(i64 vmctx, i32) -> i64 {
+    gv0 = vmctx
+    heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32
+
+block0(v0: i64, v1: i32):
+    v2 = heap_addr.i64 heap0, v1, 0
+    return v2
+}
+
+; block0:
+;   uext.w t3,a1
+;   lui a7,16
+;   ugt t4,t3,a7##ty=i64
+;   beq t4,zero,taken(label1),not_taken(label2)
+; block1:
+;   add t4,a0,t3
+;   lui a7,16
+;   ugt t0,t3,a7##ty=i64
+;   li t1,0
+;   selectif_spectre_guard a0,t1,t4##test=t0
+;   ret
+; block2:
+;   udf##trap_code=heap_oob
+
--- a/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif
@@ -0,0 +1,24 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function u0:0() -> i8 system_v {
+
+block0:
+    v0 = iconst.i16 0xddcc
+    v1 = icmp.i16 ne v0, v0
+    v2 = bint.i8 v1
+    return v2
+}
+
+; block0:
+;   lui t2,14
+;   addi t2,t2,3532
+;   lui a2,14
+;   addi a2,a2,3532
+;   uext.h a5,t2
+;   uext.h a7,a2
+;   ne t4,a5,a7##ty=i16
+;   andi a0,t4,1
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/multivalue-ret.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/multivalue-ret.clif
@@ -0,0 +1,17 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+;; Test default (non-SpiderMonkey) ABI.
+function %f() -> i64, i64 {
+block1:
+  v0 = iconst.i64 1
+  v1 = iconst.i64 2
+  return v0, v1
+}
+
+; block0:
+;   li a0,1
+;   li a1,2
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif
@@ -0,0 +1,58 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %add8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = iadd.i8 v0, v1
+  return v2
+}
+
+; block0:
+;   addw a0,a0,a1
+;   ret
+
+function %add16(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = iadd.i16 v0, v1
+  return v2
+}
+
+; block0:
+;   addw a0,a0,a1
+;   ret
+
+function %add32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = iadd.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   addw a0,a0,a1
+;   ret
+
+function %add32_8(i32, i8) -> i32 {
+block0(v0: i32, v1: i8):
+  v2 = sextend.i32 v1
+  v3 = iadd.i32 v0, v2
+  return v3
+}
+
+; block0:
+;   sext.b a2,a1
+;   addw a0,a0,a2
+;   ret
+
+function %add64_32(i64, i32) -> i64 {
+block0(v0: i64, v1: i32):
+  v2 = sextend.i64 v1
+  v3 = iadd.i64 v0, v2
+  return v3
+}
+
+; block0:
+;   sext.w a2,a1
+;   add a0,a0,a2
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/prologue.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/prologue.clif
@@ -0,0 +1,279 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f(f64) -> f64 {
+block0(v0: f64):
+    v1 = fadd.f64 v0, v0
+    v2 = fadd.f64 v0, v0
+    v3 = fadd.f64 v0, v0
+    v4 = fadd.f64 v0, v0
+    v5 = fadd.f64 v0, v0
+    v6 = fadd.f64 v0, v0
+    v7 = fadd.f64 v0, v0
+    v8 = fadd.f64 v0, v0
+    v9 = fadd.f64 v0, v0
+    v10 = fadd.f64 v0, v0
+    v11 = fadd.f64 v0, v0
+    v12 = fadd.f64 v0, v0
+    v13 = fadd.f64 v0, v0
+    v14 = fadd.f64 v0, v0
+    v15 = fadd.f64 v0, v0
+    v16 = fadd.f64 v0, v0
+    v17 = fadd.f64 v0, v0
+    v18 = fadd.f64 v0, v0
+    v19 = fadd.f64 v0, v0
+    v20 = fadd.f64 v0, v0
+    v21 = fadd.f64 v0, v0
+    v22 = fadd.f64 v0, v0
+    v23 = fadd.f64 v0, v0
+    v24 = fadd.f64 v0, v0
+    v25 = fadd.f64 v0, v0
+    v26 = fadd.f64 v0, v0
+    v27 = fadd.f64 v0, v0
+    v28 = fadd.f64 v0, v0
+    v29 = fadd.f64 v0, v0
+    v30 = fadd.f64 v0, v0
+    v31 = fadd.f64 v0, v0
+
+    v32 = fadd.f64 v0, v1
+    v33 = fadd.f64 v2, v3
+    v34 = fadd.f64 v4, v5
+    v35 = fadd.f64 v6, v7
+    v36 = fadd.f64 v8, v9
+    v37 = fadd.f64 v10, v11
+    v38 = fadd.f64 v12, v13
+    v39 = fadd.f64 v14, v15
+    v40 = fadd.f64 v16, v17
+    v41 = fadd.f64 v18, v19
+    v42 = fadd.f64 v20, v21
+    v43 = fadd.f64 v22, v23
+    v44 = fadd.f64 v24, v25
+    v45 = fadd.f64 v26, v27
+    v46 = fadd.f64 v28, v29
+    v47 = fadd.f64 v30, v31
+
+    v48 = fadd.f64 v32, v33
+    v49 = fadd.f64 v34, v35
+    v50 = fadd.f64 v36, v37
+    v51 = fadd.f64 v38, v39
+    v52 = fadd.f64 v40, v41
+    v53 = fadd.f64 v42, v43
+    v54 = fadd.f64 v44, v45
+    v55 = fadd.f64 v46, v47
+
+    v56 = fadd.f64 v48, v49
+    v57 = fadd.f64 v50, v51
+    v58 = fadd.f64 v52, v53
+    v59 = fadd.f64 v54, v55
+
+    v60 = fadd.f64 v56, v57
+    v61 = fadd.f64 v58, v59
+
+    v62 = fadd.f64 v60, v61
+
+    return v62
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   fsd fs0,-8(sp)
+;   fsd fs2,-16(sp)
+;   fsd fs3,-24(sp)
+;   fsd fs4,-32(sp)
+;   fsd fs5,-40(sp)
+;   fsd fs6,-48(sp)
+;   fsd fs7,-56(sp)
+;   fsd fs8,-64(sp)
+;   fsd fs9,-72(sp)
+;   fsd fs10,-80(sp)
+;   fsd fs11,-88(sp)
+;   add sp,-96
+; block0:
+;   fadd.d ft4,fa0,fa0
+;   fadd.d ft5,fa0,fa0
+;   fadd.d ft6,fa0,fa0
+;   fadd.d ft7,fa0,fa0
+;   fadd.d fa1,fa0,fa0
+;   fadd.d fa2,fa0,fa0
+;   fadd.d fa3,fa0,fa0
+;   fadd.d fa4,fa0,fa0
+;   fadd.d fa5,fa0,fa0
+;   fadd.d fa6,fa0,fa0
+;   fadd.d fa7,fa0,fa0
+;   fadd.d ft8,fa0,fa0
+;   fadd.d ft9,fa0,fa0
+;   fadd.d ft10,fa0,fa0
+;   fadd.d ft11,fa0,fa0
+;   fadd.d ft0,fa0,fa0
+;   fadd.d ft1,fa0,fa0
+;   fadd.d ft2,fa0,fa0
+;   fadd.d ft3,fa0,fa0
+;   fadd.d fs4,fa0,fa0
+;   fadd.d fs5,fa0,fa0
+;   fadd.d fs6,fa0,fa0
+;   fadd.d fs7,fa0,fa0
+;   fadd.d fs8,fa0,fa0
+;   fadd.d fs9,fa0,fa0
+;   fadd.d fs10,fa0,fa0
+;   fadd.d fs11,fa0,fa0
+;   fadd.d fs0,fa0,fa0
+;   fadd.d fs1,fa0,fa0
+;   fadd.d fs2,fa0,fa0
+;   fadd.d fs3,fa0,fa0
+;   fadd.d ft4,fa0,ft4
+;   fadd.d ft5,ft5,ft6
+;   fadd.d ft6,ft7,fa1
+;   fadd.d ft7,fa2,fa3
+;   fadd.d fa0,fa4,fa5
+;   fadd.d fa1,fa6,fa7
+;   fadd.d fa2,ft8,ft9
+;   fadd.d fa3,ft10,ft11
+;   fadd.d fa4,ft0,ft1
+;   fadd.d fa5,ft2,ft3
+;   fadd.d fa6,fs4,fs5
+;   fadd.d fa7,fs6,fs7
+;   fadd.d ft8,fs8,fs9
+;   fadd.d ft9,fs10,fs11
+;   fadd.d ft10,fs0,fs1
+;   fadd.d ft11,fs2,fs3
+;   fadd.d ft4,ft4,ft5
+;   fadd.d ft5,ft6,ft7
+;   fadd.d ft6,fa0,fa1
+;   fadd.d ft7,fa2,fa3
+;   fadd.d fa0,fa4,fa5
+;   fadd.d fa1,fa6,fa7
+;   fadd.d fa2,ft8,ft9
+;   fadd.d fa3,ft10,ft11
+;   fadd.d ft4,ft4,ft5
+;   fadd.d ft5,ft6,ft7
+;   fadd.d ft6,fa0,fa1
+;   fadd.d ft7,fa2,fa3
+;   fadd.d ft4,ft4,ft5
+;   fadd.d ft5,ft6,ft7
+;   fadd.d fa0,ft4,ft5
+;   add sp,+96
+;   fld fs0,-8(sp)
+;   fld fs2,-16(sp)
+;   fld fs3,-24(sp)
+;   fld fs4,-32(sp)
+;   fld fs5,-40(sp)
+;   fld fs6,-48(sp)
+;   fld fs7,-56(sp)
+;   fld fs8,-64(sp)
+;   fld fs9,-72(sp)
+;   fld fs10,-80(sp)
+;   fld fs11,-88(sp)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %f2(i64) -> i64 {
+block0(v0: i64):
+    v1 = iadd.i64 v0, v0
+    v2 = iadd.i64 v0, v1
+    v3 = iadd.i64 v0, v2
+    v4 = iadd.i64 v0, v3
+    v5 = iadd.i64 v0, v4
+    v6 = iadd.i64 v0, v5
+    v7 = iadd.i64 v0, v6
+    v8 = iadd.i64 v0, v7
+    v9 = iadd.i64 v0, v8
+    v10 = iadd.i64 v0, v9
+    v11 = iadd.i64 v0, v10
+    v12 = iadd.i64 v0, v11
+    v13 = iadd.i64 v0, v12
+    v14 = iadd.i64 v0, v13
+    v15 = iadd.i64 v0, v14
+    v16 = iadd.i64 v0, v15
+    v17 = iadd.i64 v0, v16
+    v18 = iadd.i64 v0, v17
+
+    v19 = iadd.i64 v0, v1
+    v20 = iadd.i64 v2, v3
+    v21 = iadd.i64 v4, v5
+    v22 = iadd.i64 v6, v7
+    v23 = iadd.i64 v8, v9
+    v24 = iadd.i64 v10, v11
+    v25 = iadd.i64 v12, v13
+    v26 = iadd.i64 v14, v15
+    v27 = iadd.i64 v16, v17
+
+    v28 = iadd.i64 v18, v19
+    v29 = iadd.i64 v20, v21
+    v30 = iadd.i64 v22, v23
+    v31 = iadd.i64 v24, v25
+    v32 = iadd.i64 v26, v27
+
+    v33 = iadd.i64 v28, v29
+    v34 = iadd.i64 v30, v31
+
+    v35 = iadd.i64 v32, v33
+    v36 = iadd.i64 v34, v35
+
+    return v36
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   sd s6,-8(sp)
+;   sd s7,-16(sp)
+;   sd s8,-24(sp)
+;   sd s9,-32(sp)
+;   sd s10,-40(sp)
+;   sd s11,-48(sp)
+;   add sp,-48
+; block0:
+;   add t4,a0,a0
+;   add t0,a0,t4
+;   add t1,a0,t0
+;   add t2,a0,t1
+;   add a1,a0,t2
+;   add a2,a0,a1
+;   add a3,a0,a2
+;   add a4,a0,a3
+;   add a5,a0,a4
+;   add a6,a0,a5
+;   add a7,a0,a6
+;   add t3,a0,a7
+;   add s6,a0,t3
+;   add s7,a0,s6
+;   add s8,a0,s7
+;   add s9,a0,s8
+;   add s10,a0,s9
+;   add s11,a0,s10
+;   add t4,a0,t4
+;   add t0,t0,t1
+;   add t1,t2,a1
+;   add t2,a2,a3
+;   add a0,a4,a5
+;   add a1,a6,a7
+;   add a2,t3,s6
+;   add a3,s7,s8
+;   add a4,s9,s10
+;   add t4,s11,t4
+;   add t0,t0,t1
+;   add t1,t2,a0
+;   add t2,a1,a2
+;   add a0,a3,a4
+;   add t4,t4,t0
+;   add t0,t1,t2
+;   add t4,a0,t4
+;   add a0,t0,t4
+;   add sp,+48
+;   ld s6,-8(sp)
+;   ld s7,-16(sp)
+;   ld s8,-24(sp)
+;   ld s9,-32(sp)
+;   ld s10,-40(sp)
+;   ld s11,-48(sp)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/reduce.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/reduce.clif
@@ -0,0 +1,40 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %ireduce_128_64(i128) -> i64 {
+block0(v0: i128):
+  v1 = ireduce.i64 v0
+  return v1
+}
+
+; block0:
+;   ret
+
+function %ireduce_128_32(i128) -> i32 {
+block0(v0: i128):
+  v1 = ireduce.i32 v0
+  return v1
+}
+
+; block0:
+;   ret
+
+function %ireduce_128_16(i128) -> i16 {
+block0(v0: i128):
+  v1 = ireduce.i16 v0
+  return v1
+}
+
+; block0:
+;   ret
+
+function %ireduce_128_8(i128) -> i8 {
+block0(v0: i128):
+  v1 = ireduce.i8 v0
+  return v1
+}
+
+; block0:
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/reftypes.clif
@@ -0,0 +1,103 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f0(r64) -> r64 {
+block0(v0: r64):
+  return v0
+}
+
+; block0:
+;   ret
+
+function %f1(r64) -> b1 {
+block0(v0: r64):
+  v1 = is_null v0
+  return v1
+}
+
+; block0:
+;   is_null a0,a0
+;   ret
+
+function %f2(r64) -> b1 {
+block0(v0: r64):
+  v1 = is_invalid v0
+  return v1
+}
+
+; block0:
+;   is_invalid a0,a0
+;   ret
+
+function %f3() -> r64 {
+block0:
+  v0 = null.r64
+  return v0
+}
+
+; block0:
+;   li a0,0
+;   ret
+
+function %f4(r64, r64) -> r64, r64, r64 {
+    fn0 = %f(r64) -> b1
+    ss0 = explicit_slot 8
+
+block0(v0: r64, v1: r64):
+    v2 = call fn0(v0)
+    stack_store.r64 v0, ss0
+    brz v2, block1(v1, v0)
+    jump block2(v0, v1)
+
+block1(v3: r64, v4: r64):
+    jump block3(v3, v4)
+
+block2(v5: r64, v6: r64):
+    jump block3(v5, v6)
+
+block3(v7: r64, v8: r64):
+    v9 = stack_load.r64 ss0
+    return v7, v8, v9
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   sd s9,-8(sp)
+;   add sp,-48
+; block0:
+;   sd a0,8(nominal_sp)
+;   sd a1,16(nominal_sp)
+;   mv s9,a2
+;   load_sym a3,%f+0
+;   callind a3
+;   load_addr a2,nsp+0
+;   ld t1,8(nominal_sp)
+;   sd t1,0(a2)
+;   beq a0,zero,taken(label1),not_taken(label3)
+; block1:
+;   j label2
+; block2:
+;   mv a1,t1
+;   ld a0,16(nominal_sp)
+;   j label5
+; block3:
+;   j label4
+; block4:
+;   mv a0,t1
+;   ld a1,16(nominal_sp)
+;   j label5
+; block5:
+;   load_addr a4,nsp+0
+;   ld a4,0(a4)
+;   mv a2,s9
+;   sd a4,0(a2)
+;   add sp,+48
+;   ld s9,-8(sp)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/shift-op.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/shift-op.clif
@@ -0,0 +1,28 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 3
+  v2 = ishl.i64 v0, v1
+  v3 = iadd.i64 v0, v2
+  return v3
+}
+
+; block0:
+;   slli a1,a0,3
+;   add a0,a0,a1
+;   ret
+
+function %f(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 53
+  v2 = ishl.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   slliw a0,a0,53
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif
@@ -0,0 +1,451 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ROR, variable
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+function %i128_rotr(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+  v2 = rotr.i128 v0, v1
+  return v2
+}
+
+; block0:
+;   andi a4,a2,127
+;   li a6,128
+;   sub a6,a6,a4
+;   srl t4,a0,a4
+;   sll t1,a1,a6
+;   select_reg a2,zero,t1##condition=(a4 eq zero)
+;   or a2,t4,a2
+;   srl a5,a1,a4
+;   sll a6,a0,a6
+;   select_reg t3,zero,a6##condition=(a4 eq zero)
+;   or t0,a5,t3
+;   li t2,64
+;   select_reg a0,t0,a2##condition=(a4 uge t2)
+;   select_reg a1,a2,t0##condition=(a4 uge t2)
+;   ret
+
+function %f0(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = rotr.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   andi a1,a1,63
+;   li a3,64
+;   sub a3,a3,a1
+;   srl a6,a0,a1
+;   sll t3,a0,a3
+;   select_reg t0,zero,t3##condition=(a1 eq zero)
+;   or a0,a6,t0
+;   ret
+
+function %f1(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = rotr.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.w a2,a0
+;   andi a3,a1,31
+;   li a5,32
+;   sub a5,a5,a3
+;   srl t3,a2,a3
+;   sll t0,a2,a5
+;   select_reg t2,zero,t0##condition=(a3 eq zero)
+;   or a0,t3,t2
+;   ret
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = rotr.i16 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.h a2,a0
+;   andi a3,a1,15
+;   li a5,16
+;   sub a5,a5,a3
+;   srl t3,a2,a3
+;   sll t0,a2,a5
+;   select_reg t2,zero,t0##condition=(a3 eq zero)
+;   or a0,t3,t2
+;   ret
+
+function %f3(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = rotr.i8 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.b a2,a0
+;   andi a3,a1,7
+;   li a5,8
+;   sub a5,a5,a3
+;   srl t3,a2,a3
+;   sll t0,a2,a5
+;   select_reg t2,zero,t0##condition=(a3 eq zero)
+;   or a0,t3,t2
+;   ret
+
+function %i128_rotl(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+  v2 = rotl.i128 v0, v1
+  return v2
+}
+
+; block0:
+;   andi a4,a2,127
+;   li a6,128
+;   sub a6,a6,a4
+;   sll t4,a0,a4
+;   srl t1,a1,a6
+;   select_reg a2,zero,t1##condition=(a4 eq zero)
+;   or a2,t4,a2
+;   sll a5,a1,a4
+;   srl a6,a0,a6
+;   select_reg t3,zero,a6##condition=(a4 eq zero)
+;   or t0,a5,t3
+;   li t2,64
+;   select_reg a0,t0,a2##condition=(a4 uge t2)
+;   select_reg a1,a2,t0##condition=(a4 uge t2)
+;   ret
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = rotl.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   andi a1,a1,63
+;   li a3,64
+;   sub a3,a3,a1
+;   sll a6,a0,a1
+;   srl t3,a0,a3
+;   select_reg t0,zero,t3##condition=(a1 eq zero)
+;   or a0,a6,t0
+;   ret
+
+function %f5(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = rotl.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.w a2,a0
+;   andi a3,a1,31
+;   li a5,32
+;   sub a5,a5,a3
+;   sll t3,a2,a3
+;   srl t0,a2,a5
+;   select_reg t2,zero,t0##condition=(a3 eq zero)
+;   or a0,t3,t2
+;   ret
+
+function %f6(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = rotl.i16 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.h a2,a0
+;   andi a3,a1,15
+;   li a5,16
+;   sub a5,a5,a3
+;   sll t3,a2,a3
+;   srl t0,a2,a5
+;   select_reg t2,zero,t0##condition=(a3 eq zero)
+;   or a0,t3,t2
+;   ret
+
+function %f7(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = rotl.i8 v0, v1
+  return v2
+}
+
+; block0:
+;   uext.b a2,a0
+;   andi a3,a1,7
+;   li a5,8
+;   sub a5,a5,a3
+;   sll t3,a2,a3
+;   srl t0,a2,a5
+;   select_reg t2,zero,t0##condition=(a3 eq zero)
+;   or a0,t3,t2
+;   ret
+
+function %f8(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = ushr.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   srl a0,a0,a1
+;   ret
+
+function %f9(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = ushr.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   srlw a0,a0,a1
+;   ret
+
+function %f10(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = ushr.i16 v0, v1
+  return v2
+}
+
+; block0:
+;   mv a5,a1
+;   uext.h a1,a0
+;   andi a3,a5,15
+;   srlw a0,a1,a3
+;   ret
+
+function %f11(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = ushr.i8 v0, v1
+  return v2
+}
+
+; block0:
+;   mv a5,a1
+;   uext.b a1,a0
+;   andi a3,a5,7
+;   srlw a0,a1,a3
+;   ret
+
+function %f12(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = ishl.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   sll a0,a0,a1
+;   ret
+
+function %f13(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = ishl.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   sllw a0,a0,a1
+;   ret
+
+function %f14(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = ishl.i16 v0, v1
+  return v2
+}
+
+; block0:
+;   andi a1,a1,15
+;   sllw a0,a0,a1
+;   ret
+
+function %f15(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = ishl.i8 v0, v1
+  return v2
+}
+
+; block0:
+;   andi a1,a1,7
+;   sllw a0,a0,a1
+;   ret
+
+function %f16(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = sshr.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   sra a0,a0,a1
+;   ret
+
+function %f17(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = sshr.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   sraw a0,a0,a1
+;   ret
+
+function %f18(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = sshr.i16 v0, v1
+  return v2
+}
+
+; block0:
+;   mv a5,a1
+;   sext.h a1,a0
+;   andi a3,a5,15
+;   sra a0,a1,a3
+;   ret
+
+function %f19(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = sshr.i8 v0, v1
+  return v2
+}
+
+; block0:
+;   mv a5,a1
+;   sext.b a1,a0
+;   andi a3,a5,7
+;   sra a0,a1,a3
+;   ret
+
+function %f20(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i32 17
+  v2 = rotr.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,17
+;   andi a2,a1,63
+;   li a4,64
+;   sub a4,a4,a2
+;   srl a7,a0,a2
+;   sll t4,a0,a4
+;   select_reg t1,zero,t4##condition=(a2 eq zero)
+;   or a0,a7,t1
+;   ret
+
+function %f21(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i32 17
+  v2 = rotl.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   li a1,17
+;   andi a2,a1,63
+;   li a4,64
+;   sub a4,a4,a2
+;   sll a7,a0,a2
+;   srl t4,a0,a4
+;   select_reg t1,zero,t4##condition=(a2 eq zero)
+;   or a0,a7,t1
+;   ret
+
+function %f22(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 17
+  v2 = rotl.i32 v0, v1
+  return v2
+}
+
+; block0:
+;   mv t4,a0
+;   li a0,17
+;   uext.w a2,t4
+;   andi a4,a0,31
+;   li a6,32
+;   sub a6,a6,a4
+;   sll t4,a2,a4
+;   srl t1,a2,a6
+;   select_reg a0,zero,t1##condition=(a4 eq zero)
+;   or a0,t4,a0
+;   ret
+
+function %f23(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i32 10
+  v2 = rotl.i16 v0, v1
+  return v2
+}
+
+; block0:
+;   mv t4,a0
+;   li a0,10
+;   uext.h a2,t4
+;   andi a4,a0,15
+;   li a6,16
+;   sub a6,a6,a4
+;   sll t4,a2,a4
+;   srl t1,a2,a6
+;   select_reg a0,zero,t1##condition=(a4 eq zero)
+;   or a0,t4,a0
+;   ret
+
+function %f24(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i32 3
+  v2 = rotl.i8 v0, v1
+  return v2
+}
+
+; block0:
+;   mv t4,a0
+;   li a0,3
+;   uext.b a2,t4
+;   andi a4,a0,7
+;   li a6,8
+;   sub a6,a6,a4
+;   sll t4,a2,a4
+;   srl t1,a2,a6
+;   select_reg a0,zero,t1##condition=(a4 eq zero)
+;   or a0,t4,a0
+;   ret
+
+function %f25(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i32 17
+  v2 = ushr.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   srli a0,a0,17
+;   ret
+
+function %f26(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i32 17
+  v2 = sshr.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   srai a0,a0,17
+;   ret
+
+function %f27(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i32 17
+  v2 = ishl.i64 v0, v1
+  return v2
+}
+
+; block0:
+;   slli a0,a0,17
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif
@@ -0,0 +1,206 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %foo() {
+block0:
+    return
+}
+
+; block0:
+;   ret
+
+function %stack_limit_leaf_zero(i64 stack_limit) {
+block0(v0: i64):
+    return
+}
+
+; block0:
+;   ret
+
+function %stack_limit_gv_leaf_zero(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+block0(v0: i64):
+    return
+}
+
+; block0:
+;   ret
+
+function %stack_limit_call_zero(i64 stack_limit) {
+    fn0 = %foo()
+block0(v0: i64):
+    call fn0()
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   trap_ifc stk_ovf##(sp ult a0)
+; block0:
+;   load_sym t2,%foo+0
+;   callind t2
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %stack_limit_gv_call_zero(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+    fn0 = %foo()
+block0(v0: i64):
+    call fn0()
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   ld t6,0(a0)
+;   ld t6,4(t6)
+;   trap_ifc stk_ovf##(sp ult t6)
+; block0:
+;   load_sym t2,%foo+0
+;   callind t2
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %stack_limit(i64 stack_limit) {
+    ss0 = explicit_slot 168
+block0(v0: i64):
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   andi t6,a0,176
+;   trap_ifc stk_ovf##(sp ult t6)
+;   add sp,-176
+; block0:
+;   add sp,+176
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %huge_stack_limit(i64 stack_limit) {
+    ss0 = explicit_slot 400000
+block0(v0: i64):
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   trap_ifc stk_ovf##(sp ult a0)
+;   lui t5,98
+;   addi t5,t5,2688
+;   add t6,t5,a0
+;   trap_ifc stk_ovf##(sp ult t6)
+;   lui a0,98
+;   addi a0,a0,2688
+;   call %Probestack
+;   add sp,-400000
+; block0:
+;   add sp,+400000
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %limit_preamble(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+    ss0 = explicit_slot 20
+block0(v0: i64):
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   ld t6,0(a0)
+;   ld t6,4(t6)
+;   andi t6,t6,32
+;   trap_ifc stk_ovf##(sp ult t6)
+;   add sp,-32
+; block0:
+;   add sp,+32
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %limit_preamble_huge(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0
+    gv2 = load.i64 notrap aligned gv1+4
+    stack_limit = gv2
+    ss0 = explicit_slot 400000
+block0(v0: i64):
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   ld t6,0(a0)
+;   ld t6,4(t6)
+;   trap_ifc stk_ovf##(sp ult t6)
+;   lui t5,98
+;   addi t5,t5,2688
+;   add t6,t5,t6
+;   trap_ifc stk_ovf##(sp ult t6)
+;   lui a0,98
+;   addi a0,a0,2688
+;   call %Probestack
+;   add sp,-400000
+; block0:
+;   add sp,+400000
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %limit_preamble_huge_offset(i64 vmctx) {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+400000
+    stack_limit = gv1
+    ss0 = explicit_slot 20
+block0(v0: i64):
+    return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   ld t6,400000(a0)
+;   andi t6,t6,32
+;   trap_ifc stk_ovf##(sp ult t6)
+;   add sp,-32
+; block0:
+;   add sp,+32
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/stack.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/stack.clif
@@ -0,0 +1,630 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %stack_addr_small() -> i64 {
+ss0 = explicit_slot 8
+
+block0:
+  v0 = stack_addr.i64 ss0
+  return v0
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-16
+; block0:
+;   load_addr a0,nsp+0
+;   add sp,+16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %stack_addr_big() -> i64 {
+ss0 = explicit_slot 100000
+ss1 = explicit_slot 8
+
+block0:
+  v0 = stack_addr.i64 ss0
+  return v0
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   lui a0,24
+;   addi a0,a0,1712
+;   call %Probestack
+;   add sp,-100016
+; block0:
+;   load_addr a0,nsp+0
+;   add sp,+100016
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %stack_load_small() -> i64 {
+ss0 = explicit_slot 8
+
+block0:
+  v0 = stack_load.i64 ss0
+  return v0
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-16
+; block0:
+;   load_addr t2,nsp+0
+;   ld a0,0(t2)
+;   add sp,+16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %stack_load_big() -> i64 {
+ss0 = explicit_slot 100000
+ss1 = explicit_slot 8
+
+block0:
+  v0 = stack_load.i64 ss0
+  return v0
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   lui a0,24
+;   addi a0,a0,1712
+;   call %Probestack
+;   add sp,-100016
+; block0:
+;   load_addr t2,nsp+0
+;   ld a0,0(t2)
+;   add sp,+100016
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %stack_store_small(i64) {
+ss0 = explicit_slot 8
+
+block0(v0: i64):
+  stack_store.i64 v0, ss0
+  return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-16
+; block0:
+;   load_addr t2,nsp+0
+;   sd a0,0(t2)
+;   add sp,+16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %stack_store_big(i64) {
+ss0 = explicit_slot 100000
+ss1 = explicit_slot 8
+
+block0(v0: i64):
+  stack_store.i64 v0, ss0
+  return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   lui a0,24
+;   addi a0,a0,1712
+;   call %Probestack
+;   add sp,-100016
+; block0:
+;   load_addr t2,nsp+0
+;   sd a0,0(t2)
+;   add sp,+100016
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %b1_spill_slot(b1) -> b1, i64 {
+    ss0 = explicit_slot 1000
+
+block0(v0: b1):
+  v1 = iconst.i64 1
+  v2 = iconst.i64 2
+  v3 = iconst.i64 3
+  v4 = iconst.i64 4
+  v5 = iconst.i64 5
+  v6 = iconst.i64 6
+  v7 = iconst.i64 7
+  v8 = iconst.i64 8
+  v9 = iconst.i64 9
+  v10 = iconst.i64 10
+  v11 = iconst.i64 11
+  v12 = iconst.i64 12
+  v13 = iconst.i64 13
+  v14 = iconst.i64 14
+  v15 = iconst.i64 15
+  v16 = iconst.i64 16
+  v17 = iconst.i64 17
+  v18 = iconst.i64 18
+  v19 = iconst.i64 19
+  v20 = iconst.i64 20
+  v21 = iconst.i64 21
+  v22 = iconst.i64 22
+  v23 = iconst.i64 23
+  v24 = iconst.i64 24
+  v25 = iconst.i64 25
+  v26 = iconst.i64 26
+  v27 = iconst.i64 27
+  v28 = iconst.i64 28
+  v29 = iconst.i64 29
+  v30 = iconst.i64 30
+  v31 = iconst.i64 31
+  v32 = iconst.i64 32
+  v33 = iconst.i64 33
+  v34 = iconst.i64 34
+  v35 = iconst.i64 35
+  v36 = iconst.i64 36
+  v37 = iconst.i64 37
+  v38 = iconst.i64 38
+  v39 = iconst.i64 39
+  v40 = iconst.i64 30
+  v41 = iconst.i64 31
+  v42 = iconst.i64 32
+  v43 = iconst.i64 33
+  v44 = iconst.i64 34
+  v45 = iconst.i64 35
+  v46 = iconst.i64 36
+  v47 = iconst.i64 37
+  v48 = iconst.i64 38
+  v49 = iconst.i64 39
+  v50 = iconst.i64 30
+  v51 = iconst.i64 31
+  v52 = iconst.i64 32
+  v53 = iconst.i64 33
+  v54 = iconst.i64 34
+  v55 = iconst.i64 35
+  v56 = iconst.i64 36
+  v57 = iconst.i64 37
+  v58 = iconst.i64 38
+  v59 = iconst.i64 39
+  v60 = iconst.i64 30
+  v61 = iconst.i64 31
+  v62 = iconst.i64 32
+  v63 = iconst.i64 33
+  v64 = iconst.i64 34
+  v65 = iconst.i64 35
+  v66 = iconst.i64 36
+  v67 = iconst.i64 37
+  v68 = iconst.i64 38
+  v69 = iconst.i64 39
+
+  v70 = iadd.i64 v1, v2
+  v71 = iadd.i64 v3, v4
+  v72 = iadd.i64 v5, v6
+  v73 = iadd.i64 v7, v8
+  v74 = iadd.i64 v9, v10
+  v75 = iadd.i64 v11, v12
+  v76 = iadd.i64 v13, v14
+  v77 = iadd.i64 v15, v16
+  v78 = iadd.i64 v17, v18
+  v79 = iadd.i64 v19, v20
+  v80 = iadd.i64 v21, v22
+  v81 = iadd.i64 v23, v24
+  v82 = iadd.i64 v25, v26
+  v83 = iadd.i64 v27, v28
+  v84 = iadd.i64 v29, v30
+  v85 = iadd.i64 v31, v32
+  v86 = iadd.i64 v33, v34
+  v87 = iadd.i64 v35, v36
+  v88 = iadd.i64 v37, v38
+  v89 = iadd.i64 v39, v40
+  v90 = iadd.i64 v41, v42
+  v91 = iadd.i64 v43, v44
+  v92 = iadd.i64 v45, v46
+  v93 = iadd.i64 v47, v48
+  v94 = iadd.i64 v49, v50
+  v95 = iadd.i64 v51, v52
+  v96 = iadd.i64 v53, v54
+  v97 = iadd.i64 v55, v56
+  v98 = iadd.i64 v57, v58
+  v99 = iadd.i64 v59, v60
+  v100 = iadd.i64 v61, v62
+  v101 = iadd.i64 v63, v64
+  v102 = iadd.i64 v65, v66
+  v103 = iadd.i64 v67, v68
+
+  v104 = iadd.i64 v69, v70
+  v105 = iadd.i64 v71, v72
+  v106 = iadd.i64 v73, v74
+  v107 = iadd.i64 v75, v76
+  v108 = iadd.i64 v77, v78
+  v109 = iadd.i64 v79, v80
+  v110 = iadd.i64 v81, v82
+  v111 = iadd.i64 v83, v84
+  v112 = iadd.i64 v85, v86
+  v113 = iadd.i64 v87, v88
+  v114 = iadd.i64 v89, v90
+  v115 = iadd.i64 v91, v92
+  v116 = iadd.i64 v93, v94
+  v117 = iadd.i64 v95, v96
+  v118 = iadd.i64 v97, v98
+  v119 = iadd.i64 v99, v100
+  v120 = iadd.i64 v101, v102
+
+  v121 = iadd.i64 v103, v104
+  v122 = iadd.i64 v105, v106
+  v123 = iadd.i64 v107, v108
+  v124 = iadd.i64 v109, v110
+  v125 = iadd.i64 v111, v112
+  v126 = iadd.i64 v113, v114
+  v127 = iadd.i64 v115, v116
+  v128 = iadd.i64 v117, v118
+  v129 = iadd.i64 v119, v120
+
+  v130 = iadd.i64 v121, v122
+  v131 = iadd.i64 v123, v124
+  v132 = iadd.i64 v125, v126
+  v133 = iadd.i64 v127, v128
+
+  v134 = iadd.i64 v129, v130
+  v135 = iadd.i64 v131, v132
+
+  v136 = iadd.i64 v133, v134
+  v137 = iadd.i64 v135, v136
+
+  return v0, v137
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   sd s1,-8(sp)
+;   sd s2,-16(sp)
+;   sd s3,-24(sp)
+;   sd s4,-32(sp)
+;   sd s5,-40(sp)
+;   sd s6,-48(sp)
+;   sd s7,-56(sp)
+;   sd s8,-64(sp)
+;   sd s9,-72(sp)
+;   sd s10,-80(sp)
+;   sd s11,-88(sp)
+;   add sp,-1280
+; block0:
+;   sd a0,1000(nominal_sp)
+;   li t0,2
+;   addi a1,t0,1
+;   sd a1,1176(nominal_sp)
+;   li t0,4
+;   addi a2,t0,3
+;   sd a2,1168(nominal_sp)
+;   li t0,6
+;   addi a3,t0,5
+;   sd a3,1160(nominal_sp)
+;   li t0,8
+;   addi a4,t0,7
+;   sd a4,1152(nominal_sp)
+;   li t0,10
+;   addi a5,t0,9
+;   sd a5,1144(nominal_sp)
+;   li t0,12
+;   addi a6,t0,11
+;   sd a6,1136(nominal_sp)
+;   li t0,14
+;   addi a7,t0,13
+;   sd a7,1128(nominal_sp)
+;   li t0,16
+;   addi t3,t0,15
+;   sd t3,1120(nominal_sp)
+;   li t0,18
+;   addi t4,t0,17
+;   sd t4,1112(nominal_sp)
+;   li t0,20
+;   addi t0,t0,19
+;   sd t0,1104(nominal_sp)
+;   li t0,22
+;   addi t1,t0,21
+;   sd t1,1096(nominal_sp)
+;   li t0,24
+;   addi s8,t0,23
+;   sd s8,1088(nominal_sp)
+;   li t0,26
+;   addi s9,t0,25
+;   sd s9,1080(nominal_sp)
+;   li t0,28
+;   addi s10,t0,27
+;   sd s10,1072(nominal_sp)
+;   li t0,30
+;   addi s11,t0,29
+;   sd s11,1064(nominal_sp)
+;   li t0,32
+;   addi s1,t0,31
+;   sd s1,1056(nominal_sp)
+;   li t0,34
+;   addi s2,t0,33
+;   sd s2,1048(nominal_sp)
+;   li t0,36
+;   addi s3,t0,35
+;   sd s3,1040(nominal_sp)
+;   li t0,38
+;   addi s4,t0,37
+;   sd s4,1032(nominal_sp)
+;   li t0,30
+;   addi s5,t0,39
+;   sd s5,1024(nominal_sp)
+;   li t0,32
+;   addi s6,t0,31
+;   sd s6,1016(nominal_sp)
+;   li t0,34
+;   addi s7,t0,33
+;   sd s7,1008(nominal_sp)
+;   li t0,36
+;   addi s7,t0,35
+;   li t0,38
+;   addi a0,t0,37
+;   li t0,30
+;   addi t2,t0,39
+;   li t0,32
+;   addi a1,t0,31
+;   li t0,34
+;   addi a2,t0,33
+;   li t0,36
+;   addi a3,t0,35
+;   li t0,38
+;   addi a4,t0,37
+;   li t0,30
+;   addi a5,t0,39
+;   li t0,32
+;   addi a6,t0,31
+;   li t0,34
+;   addi a7,t0,33
+;   li t0,36
+;   addi t3,t0,35
+;   li t0,38
+;   addi t4,t0,37
+;   ld t0,1176(nominal_sp)
+;   addi t0,t0,39
+;   ld t1,1160(nominal_sp)
+;   ld s4,1168(nominal_sp)
+;   add t1,s4,t1
+;   ld s11,1144(nominal_sp)
+;   ld s9,1152(nominal_sp)
+;   add s8,s9,s11
+;   ld s5,1128(nominal_sp)
+;   ld s3,1136(nominal_sp)
+;   add s9,s3,s5
+;   ld s10,1112(nominal_sp)
+;   ld s11,1120(nominal_sp)
+;   add s10,s11,s10
+;   ld s4,1096(nominal_sp)
+;   ld s2,1104(nominal_sp)
+;   add s11,s2,s4
+;   ld s1,1080(nominal_sp)
+;   ld s2,1088(nominal_sp)
+;   add s1,s2,s1
+;   ld s3,1064(nominal_sp)
+;   ld s2,1072(nominal_sp)
+;   add s2,s2,s3
+;   ld s3,1048(nominal_sp)
+;   ld s6,1056(nominal_sp)
+;   add s3,s6,s3
+;   ld s4,1032(nominal_sp)
+;   ld s5,1040(nominal_sp)
+;   add s4,s5,s4
+;   ld s6,1016(nominal_sp)
+;   ld s5,1024(nominal_sp)
+;   add s5,s5,s6
+;   ld s6,1008(nominal_sp)
+;   add s7,s6,s7
+;   add t2,a0,t2
+;   add a0,a1,a2
+;   add a1,a3,a4
+;   add a2,a5,a6
+;   add a3,a7,t3
+;   add a4,t4,t0
+;   add t1,t1,s8
+;   add a5,s9,s10
+;   add a6,s11,s1
+;   add a7,s2,s3
+;   add t3,s4,s5
+;   add t2,s7,t2
+;   add a0,a0,a1
+;   add a1,a2,a3
+;   add t1,a4,t1
+;   add a2,a5,a6
+;   add a3,a7,t3
+;   add t2,t2,a0
+;   add t1,a1,t1
+;   add a0,a2,a3
+;   add t1,t2,t1
+;   add a1,a0,t1
+;   ld a0,1000(nominal_sp)
+;   add sp,+1280
+;   ld s1,-8(sp)
+;   ld s2,-16(sp)
+;   ld s3,-24(sp)
+;   ld s4,-32(sp)
+;   ld s5,-40(sp)
+;   ld s6,-48(sp)
+;   ld s7,-56(sp)
+;   ld s8,-64(sp)
+;   ld s9,-72(sp)
+;   ld s10,-80(sp)
+;   ld s11,-88(sp)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %i128_stack_store(i128) {
+ss0 = explicit_slot 16
+
+block0(v0: i128):
+  stack_store.i128 v0, ss0
+  return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-16
+; block0:
+;   mv a2,a0
+;   load_addr a0,nsp+0
+;   sd a2,0(a0)
+;   sd a1,8(a0)
+;   add sp,+16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %i128_stack_store_inst_offset(i128) {
+ss0 = explicit_slot 16
+ss1 = explicit_slot 16
+
+block0(v0: i128):
+  stack_store.i128 v0, ss1+16
+  return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-32
+; block0:
+;   mv a2,a0
+;   load_addr a0,nsp+32
+;   sd a2,0(a0)
+;   sd a1,8(a0)
+;   add sp,+32
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %i128_stack_store_big(i128) {
+ss0 = explicit_slot 100000
+ss1 = explicit_slot 8
+
+block0(v0: i128):
+  stack_store.i128 v0, ss0
+  return
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   lui a0,24
+;   addi a0,a0,1712
+;   call %Probestack
+;   add sp,-100016
+; block0:
+;   mv a2,a0
+;   load_addr a0,nsp+0
+;   sd a2,0(a0)
+;   sd a1,8(a0)
+;   add sp,+100016
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %i128_stack_load() -> i128 {
+ss0 = explicit_slot 16
+
+block0:
+  v0 = stack_load.i128 ss0
+  return v0
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-16
+; block0:
+;   load_addr a1,nsp+0
+;   ld a0,0(a1)
+;   ld a1,8(a1)
+;   add sp,+16
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %i128_stack_load_inst_offset() -> i128 {
+ss0 = explicit_slot 16
+ss1 = explicit_slot 16
+
+block0:
+  v0 = stack_load.i128 ss1+16
+  return v0
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   add sp,-32
+; block0:
+;   load_addr a1,nsp+32
+;   ld a0,0(a1)
+;   ld a1,8(a1)
+;   add sp,+32
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
+function %i128_stack_load_big() -> i128 {
+ss0 = explicit_slot 100000
+ss1 = explicit_slot 8
+
+block0:
+  v0 = stack_load.i128 ss0
+  return v0
+}
+
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+;   lui a0,24
+;   addi a0,a0,1712
+;   call %Probestack
+;   add sp,-100016
+; block0:
+;   load_addr a1,nsp+0
+;   ld a0,0(a1)
+;   ld a1,8(a1)
+;   add sp,+100016
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/symbol-value.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/symbol-value.clif
@@ -0,0 +1,16 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f() -> i64 {
+  gv0 = symbol %my_global
+
+block0:
+  v0 = symbol_value.i64 gv0
+  return v0
+}
+
+; block0:
+;   load_sym a0,%my_global+0
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/traps.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/traps.clif
@@ -0,0 +1,36 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f() {
+block0:
+  trap user0
+}
+
+; block0:
+;   udf##trap_code=user0
+
+function %g(i64) {
+block0(v0: i64):
+  v1 = iconst.i64 42
+  v2 = ifcmp v0, v1
+  trapif eq v2, user0
+  return
+}
+
+; block0:
+;   li t2,42
+;   eq a1,a0,t2##ty=i64
+;   trap_if a1,user0
+;   ret
+
+function %h() {
+block0:
+  debugtrap
+  return
+}
+
+; block0:
+;   ebreak
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/uextend-sextend.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/uextend-sextend.clif
@@ -0,0 +1,124 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %f_u_8_64(i8) -> i64 {
+block0(v0: i8):
+  v1 = uextend.i64 v0
+  return v1
+}
+
+; block0:
+;   uext.b a0,a0
+;   ret
+
+function %f_u_8_32(i8) -> i32 {
+block0(v0: i8):
+  v1 = uextend.i32 v0
+  return v1
+}
+
+; block0:
+;   uext.b a0,a0
+;   ret
+
+function %f_u_8_16(i8) -> i16 {
+block0(v0: i8):
+  v1 = uextend.i16 v0
+  return v1
+}
+
+; block0:
+;   uext.b a0,a0
+;   ret
+
+function %f_s_8_64(i8) -> i64 {
+block0(v0: i8):
+  v1 = sextend.i64 v0
+  return v1
+}
+
+; block0:
+;   sext.b a0,a0
+;   ret
+
+function %f_s_8_32(i8) -> i32 {
+block0(v0: i8):
+  v1 = sextend.i32 v0
+  return v1
+}
+
+; block0:
+;   sext.b a0,a0
+;   ret
+
+function %f_s_8_16(i8) -> i16 {
+block0(v0: i8):
+  v1 = sextend.i16 v0
+  return v1
+}
+
+; block0:
+;   sext.b a0,a0
+;   ret
+
+function %f_u_16_64(i16) -> i64 {
+block0(v0: i16):
+  v1 = uextend.i64 v0
+  return v1
+}
+
+; block0:
+;   uext.h a0,a0
+;   ret
+
+function %f_u_16_32(i16) -> i32 {
+block0(v0: i16):
+  v1 = uextend.i32 v0
+  return v1
+}
+
+; block0:
+;   uext.h a0,a0
+;   ret
+
+function %f_s_16_64(i16) -> i64 {
+block0(v0: i16):
+  v1 = sextend.i64 v0
+  return v1
+}
+
+; block0:
+;   sext.h a0,a0
+;   ret
+
+function %f_s_16_32(i16) -> i32 {
+block0(v0: i16):
+  v1 = sextend.i32 v0
+  return v1
+}
+
+; block0:
+;   sext.h a0,a0
+;   ret
+
+function %f_u_32_64(i32) -> i64 {
+block0(v0: i32):
+  v1 = uextend.i64 v0
+  return v1
+}
+
+; block0:
+;   uext.w a0,a0
+;   ret
+
+function %f_s_32_64(i32) -> i64 {
+block0(v0: i32):
+  v1 = sextend.i64 v0
+  return v1
+}
+
+; block0:
+;   sext.w a0,a0
+;   ret
+
--- a/cranelift/filetests/filetests/runtests/alias.clif
+++ b/cranelift/filetests/filetests/runtests/alias.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64

 function %alias(i8) -> i8 {
 block0(v0: i8):
--- a/cranelift/filetests/filetests/runtests/arithmetic.clif
+++ b/cranelift/filetests/filetests/runtests/arithmetic.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64 has_m

 function %add_i64(i64, i64) -> i64 {
 block0(v0: i64,v1: i64):
--- a/cranelift/filetests/filetests/runtests/atomic-cas-subword-little.clif
+++ b/cranelift/filetests/filetests/runtests/atomic-cas-subword-little.clif
@@ -3,6 +3,7 @@ target s390x
 target aarch64
 target aarch64 has_lse
 target x86_64
+target riscv64

 ; We can't test that these instructions are right regarding atomicity, but we can
 ; test if they perform their operation correctly
--- a/cranelift/filetests/filetests/runtests/atomic-cas.clif
+++ b/cranelift/filetests/filetests/runtests/atomic-cas.clif
@@ -2,7 +2,8 @@ test run
 target aarch64
 target aarch64 has_lse
 target x86_64
-target s390x
+target s390x 
+target riscv64 has_a

 ; We can't test that these instructions are right regarding atomicity, but we can
 ; test if they perform their operation correctly
--- a/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif
+++ b/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif
@@ -4,6 +4,7 @@ target s390x has_mie2
 target aarch64
 target aarch64 has_lse
 target x86_64
+target riscv64 has_a

 ; We can't test that these instructions are right regarding atomicity, but we can
 ; test if they perform their operation correctly
--- a/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif
+++ b/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif
@@ -4,6 +4,7 @@ target s390x has_mie2
 target aarch64
 target aarch64 has_lse
 target x86_64
+target riscv64

 ; We can't test that these instructions are right regarding atomicity, but we can
 ; test if they perform their operation correctly
--- a/cranelift/filetests/filetests/runtests/bextend.clif
+++ b/cranelift/filetests/filetests/runtests/bextend.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target x86_64
 target s390x
+target riscv64

 function %bextend_b1_b8(b1) -> b8 {
 block0(v0: b1):
--- a/cranelift/filetests/filetests/runtests/bint.clif
+++ b/cranelift/filetests/filetests/runtests/bint.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64

 function %bint_b1_i8_true() -> i8 {
 block0:
--- a/cranelift/filetests/filetests/runtests/bitops.clif
+++ b/cranelift/filetests/filetests/runtests/bitops.clif
@@ -1,6 +1,7 @@
 test run
 target aarch64
 target s390x
+target riscv64
 target s390x has_mie2
 ; target x86_64 TODO: Not yet implemented on x86_64

--- a/cranelift/filetests/filetests/runtests/bitrev.clif
+++ b/cranelift/filetests/filetests/runtests/bitrev.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64

 function %bitrev_i8(i8) -> i8 {
 block0(v0: i8):
--- a/cranelift/filetests/filetests/runtests/bmask.clif
+++ b/cranelift/filetests/filetests/runtests/bmask.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target aarch64
 target s390x
+target riscv64

 function %bmask_b64_i64(b64) -> i64 {
 block0(v0: b64):
--- a/cranelift/filetests/filetests/runtests/br.clif
+++ b/cranelift/filetests/filetests/runtests/br.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64

 function %jump() -> b1 {
 block0:
--- a/cranelift/filetests/filetests/runtests/br_icmp.clif
+++ b/cranelift/filetests/filetests/runtests/br_icmp.clif
@@ -3,7 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
-
+target riscv64

 function %bricmp_eq_i64(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
--- a/cranelift/filetests/filetests/runtests/br_table.clif
+++ b/cranelift/filetests/filetests/runtests/br_table.clif
@@ -4,6 +4,7 @@ target aarch64
 target aarch64 use_bti
 target x86_64
 target s390x
+target riscv64

 function %br_table_i32(i32) -> i32 {
  jt0 = jump_table [block1, block2, block2, block3]
@@ -38,4 +39,4 @@ block5(v5: i32):
 ; run: %br_table_i32(4) == 8
 ; run: %br_table_i32(5) == 9
 ; run: %br_table_i32(6) == 10
-; run: %br_table_i32(-1) == 3
+; run: %br_table_i32(-1) == 3
--- a/cranelift/filetests/filetests/runtests/breduce.clif
+++ b/cranelift/filetests/filetests/runtests/breduce.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target x86_64
 target s390x
+target riscv64

 function %breduce_b8_b1(b8) -> b1 {
 block0(v0: b8):
--- a/cranelift/filetests/filetests/runtests/ceil.clif
+++ b/cranelift/filetests/filetests/runtests/ceil.clif
@@ -4,6 +4,7 @@ target x86_64
 target x86_64 has_sse41=false
 target aarch64
 target s390x
+target riscv64

 function %ceil_f32(f32) -> f32 {
 block0(v0: f32):
--- a/cranelift/filetests/filetests/runtests/cls.clif
+++ b/cranelift/filetests/filetests/runtests/cls.clif
@@ -1,6 +1,7 @@
 test interpret
 test run
 target aarch64
+target riscv64
 target s390x
 ; not implemented on `x86_64`

--- a/cranelift/filetests/filetests/runtests/clz.clif
+++ b/cranelift/filetests/filetests/runtests/clz.clif
@@ -4,6 +4,7 @@ target aarch64
 target s390x
 target x86_64
 target x86_64 has_lzcnt
+target riscv64

 function %clz_i8(i8) -> i8 {
 block0(v0: i8):
--- a/cranelift/filetests/filetests/runtests/const.clif
+++ b/cranelift/filetests/filetests/runtests/const.clif
@@ -2,6 +2,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64

 function %i8_iconst_0() -> i8 {
 block0:
--- a/cranelift/filetests/filetests/runtests/conversion.clif
+++ b/cranelift/filetests/filetests/runtests/conversion.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64

 function %fcvt_to_sint(f32) -> i32 {
 block0(v0: f32):
--- a/cranelift/filetests/filetests/runtests/conversions-load-store.clif
+++ b/cranelift/filetests/filetests/runtests/conversions-load-store.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target s390x
 target aarch64
+;; target riscv64 vector type not supported.

 function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 {
    gv0 = vmctx
--- a/cranelift/filetests/filetests/runtests/ctz.clif
+++ b/cranelift/filetests/filetests/runtests/ctz.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64
 target x86_64 has_bmi1

 function %ctz_i8(i8) -> i8 {
--- a/cranelift/filetests/filetests/runtests/div-checks.clif
+++ b/cranelift/filetests/filetests/runtests/div-checks.clif
@@ -3,6 +3,8 @@ set avoid_div_traps=false
 target aarch64
 target s390x
 target x86_64
+target riscv64 
+

 ; Tests that the `avoid_div_traps` flag prevents a trap when {s,u}rem is called
 ; with INT_MIN % -1.
--- a/cranelift/filetests/filetests/runtests/extend.clif
+++ b/cranelift/filetests/filetests/runtests/extend.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target s390x
 target x86_64
+target riscv64 

 ;;;; basic uextend

--- a/cranelift/filetests/filetests/runtests/fabs.clif
+++ b/cranelift/filetests/filetests/runtests/fabs.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target x86_64
 target s390x
+target riscv64

 function %fabs_f32(f32) -> f32 {
 block0(v0: f32):
--- a/cranelift/filetests/filetests/runtests/fadd.clif
+++ b/cranelift/filetests/filetests/runtests/fadd.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fadd_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-eq.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-eq.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fcmp_eq_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-ge.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-ge.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fcmp_ge_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-gt.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-gt.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fcmp_gt_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-le.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-le.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fcmp_le_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-lt.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-lt.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fcmp_lt_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-ne.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-ne.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fcmp_ne_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-one.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-one.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64

 function %fcmp_one_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-ord.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-ord.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64

 function %fcmp_ord_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-ueq.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-ueq.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64

 function %fcmp_ueq_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-uge.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-uge.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64

 function %fcmp_uge_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-ugt.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-ugt.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64

 function %fcmp_ugt_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-ule.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-ule.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64

 function %fcmp_ule_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-ult.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-ult.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64

 function %fcmp_ult_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcmp-uno.clif
+++ b/cranelift/filetests/filetests/runtests/fcmp-uno.clif
@@ -2,6 +2,8 @@ test interpret
 test run
 target x86_64
 target s390x
+target riscv64
+

 function %fcmp_uno_f32(f32, f32) -> b1 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fcopysign.clif
+++ b/cranelift/filetests/filetests/runtests/fcopysign.clif
@@ -3,6 +3,7 @@ test run
 target aarch64
 target x86_64
 target s390x
+target riscv64

 function %fcopysign_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/fdiv.clif
+++ b/cranelift/filetests/filetests/runtests/fdiv.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fdiv_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
--- a/cranelift/filetests/filetests/runtests/floor.clif
+++ b/cranelift/filetests/filetests/runtests/floor.clif
@@ -4,6 +4,7 @@ target x86_64
 target x86_64 has_sse41=false
 target aarch64
 target s390x
+target riscv64

 function %floor_f32(f32) -> f32 {
 block0(v0: f32):
--- a/cranelift/filetests/filetests/runtests/fma.clif
+++ b/cranelift/filetests/filetests/runtests/fma.clif
@@ -4,6 +4,7 @@ target aarch64
 target s390x
 target x86_64 has_avx has_fma
 target x86_64 has_avx=false has_fma=false
+target riscv64

 function %fma_f32(f32, f32, f32) -> f32 {
 block0(v0: f32, v1: f32, v2: f32):
@@ -148,4 +149,4 @@ block0(v0: f32, v1: f32, v2: f32):
    v4 = fma v0, v1, v3
    return v4
 }
-; run: %fma_load_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
+; run: %fma_load_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
--- a/cranelift/filetests/filetests/runtests/fmax-pseudo.clif
+++ b/cranelift/filetests/filetests/runtests/fmax-pseudo.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target x86_64
 target aarch64
+target riscv64
 ; target s390x FIXME: This currently fails under qemu due to a qemu bug

 function %fmax_p_f32(f32, f32) -> f32 {
--- a/cranelift/filetests/filetests/runtests/fmax.clif
+++ b/cranelift/filetests/filetests/runtests/fmax.clif
@@ -3,6 +3,7 @@ test run
 target x86_64
 target aarch64
 target s390x
+target riscv64

 function %fmax_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
--- a/Show More
+++ b/Show More