Initial ISLE integration with the x64 backend
On the build side, this commit introduces two things:
1. The automatic generation of various ISLE definitions for working with
CLIF. Specifically, it generates extern type definitions for clif opcodes and
the clif instruction data `enum`, as well as extractors for matching each clif
instructions. This happens inside the `cranelift-codegen-meta` crate.
2. The compilation of ISLE DSL sources to Rust code, that can be included in the
main `cranelift-codegen` compilation.
Next, this commit introduces the integration glue code required to get
ISLE-generated Rust code hooked up in clif-to-x64 lowering. When lowering a clif
instruction, we first try to use the ISLE code path. If it succeeds, then we are
done lowering this instruction. If it fails, then we proceed along the existing
hand-written code path for lowering.
Finally, this commit ports many lowering rules over from hand-written,
open-coded Rust to ISLE.
In the process of supporting ISLE, this commit also makes the x64 `Inst` capable
of expressing SSA by supporting 3-operand forms for all of the existing
instructions that only have a 2-operand form encoding:
dst = src1 op src2
Rather than only the typical x86-64 2-operand form:
dst = dst op src
This allows `MachInst` to be in SSA form, since `dst` and `src1` are
disentangled.
("3-operand" and "2-operand" are a little bit of a misnomer since not all
operations are binary operations, but we do the same thing for, e.g., unary
operations by disentangling the sole operand from the result.)
There are two motivations for this change:
1. To allow ISLE lowering code to have value-equivalence semantics. We want ISLE
lowering to translate a CLIF expression that evaluates to some value into a
`MachInst` expression that evaluates to the same value. We want both the
lowering itself and the resulting `MachInst` to be pure and referentially
transparent. This is both a nice paradigm for compiler writers that are
authoring and maintaining lowering rules and is a prerequisite to any sort of
formal verification of our lowering rules in the future.
2. Better align `MachInst` with `regalloc2`'s API, which requires that the input
be in SSA form.
This commit is contained in:
7
.gitattributes
vendored
7
.gitattributes
vendored
@@ -5,3 +5,10 @@
|
||||
*.png binary
|
||||
*.ico binary
|
||||
*.wasm binary
|
||||
|
||||
# ISLE should use lisp syntax highlighting.
|
||||
*.isle linguist-language=lisp
|
||||
|
||||
# Tell GitHub this is generated code, and doesn't need to be shown in diffs by
|
||||
# default.
|
||||
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs linguist-generated
|
||||
|
||||
15
.github/workflows/main.yml
vendored
15
.github/workflows/main.yml
vendored
@@ -189,6 +189,21 @@ jobs:
|
||||
working-directory: ./fuzz
|
||||
- run: cargo fuzz build --dev
|
||||
|
||||
rebuild_isle:
|
||||
name: Rebuild ISLE
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- uses: ./.github/actions/install-rust
|
||||
- name: Rebuild ISLE DSL files
|
||||
run: cargo build -p cranelift-codegen --features "rebuild-isle"
|
||||
- name: Reformat
|
||||
run: cargo fmt -p cranelift-codegen
|
||||
- name: Check that the ISLE DSL files are up-to-date
|
||||
run: git diff --exit-code
|
||||
|
||||
rebuild_peephole_optimizers:
|
||||
name: Rebuild Peephole Optimizers
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -552,7 +552,9 @@ dependencies = [
|
||||
"criterion",
|
||||
"gimli",
|
||||
"hashbrown",
|
||||
"isle",
|
||||
"log",
|
||||
"miette",
|
||||
"peepmatic",
|
||||
"peepmatic-runtime",
|
||||
"peepmatic-traits",
|
||||
|
||||
@@ -39,6 +39,8 @@ criterion = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.78.0" }
|
||||
isle = { path = "../isle/isle", version = "0.1.0", optional = true }
|
||||
miette = { version = "3", features = ["fancy"] }
|
||||
|
||||
[features]
|
||||
default = ["std", "unwind"]
|
||||
@@ -98,6 +100,9 @@ enable-peepmatic = ["peepmatic-runtime", "peepmatic-traits", "serde"]
|
||||
# Enable support for the Souper harvester.
|
||||
souper-harvest = ["souper-ir", "souper-ir/stringify"]
|
||||
|
||||
# Recompile ISLE DSL source files into their generated Rust code.
|
||||
rebuild-isle = ["isle", "cranelift-codegen-meta/rebuild-isle"]
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
|
||||
@@ -46,9 +46,12 @@ fn main() {
|
||||
isa_targets
|
||||
};
|
||||
|
||||
let cur_dir = env::current_dir().expect("Can't access current working directory");
|
||||
let crate_dir = cur_dir.as_path();
|
||||
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
||||
if let Err(err) = meta::generate(&isas, &out_dir) {
|
||||
if let Err(err) = meta::generate(&isas, &out_dir, crate_dir) {
|
||||
eprintln!("Error: {}", err);
|
||||
process::exit(1);
|
||||
}
|
||||
@@ -74,6 +77,19 @@ fn main() {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
{
|
||||
if let Err(e) = rebuild_isle(crate_dir) {
|
||||
eprintln!("Error building ISLE files: {:?}", e);
|
||||
let mut source = e.source();
|
||||
while let Some(e) = source {
|
||||
eprintln!("{:?}", e);
|
||||
source = e.source();
|
||||
}
|
||||
std::process::abort();
|
||||
}
|
||||
}
|
||||
|
||||
let pkg_version = env::var("CARGO_PKG_VERSION").unwrap();
|
||||
let mut cmd = std::process::Command::new("git");
|
||||
cmd.arg("rev-parse")
|
||||
@@ -110,3 +126,101 @@ fn main() {
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Rebuild ISLE DSL source text into generated Rust code.
|
||||
///
|
||||
/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since
|
||||
/// it consumes files generated by them.
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
fn rebuild_isle(crate_dir: &std::path::Path) -> Result<(), Box<dyn std::error::Error + 'static>> {
|
||||
use std::sync::Once;
|
||||
static SET_MIETTE_HOOK: Once = Once::new();
|
||||
SET_MIETTE_HOOK.call_once(|| {
|
||||
let _ = miette::set_hook(Box::new(|_| {
|
||||
Box::new(
|
||||
miette::MietteHandlerOpts::new()
|
||||
// `miette` mistakenly uses braille-optimized output for emacs's
|
||||
// `M-x shell`.
|
||||
.force_graphical(true)
|
||||
.build(),
|
||||
)
|
||||
}));
|
||||
});
|
||||
|
||||
let clif_isle = crate_dir.join("src").join("clif.isle");
|
||||
let prelude_isle = crate_dir.join("src").join("prelude.isle");
|
||||
let src_isa_x64 = crate_dir.join("src").join("isa").join("x64");
|
||||
|
||||
// This is a set of ISLE compilation units.
|
||||
//
|
||||
// The format of each entry is:
|
||||
//
|
||||
// (output Rust code file, input ISLE source files)
|
||||
//
|
||||
// There should be one entry for each backend that uses ISLE for lowering,
|
||||
// and if/when we replace our peephole optimization passes with ISLE, there
|
||||
// should be an entry for each of those as well.
|
||||
let isle_compilations = vec![
|
||||
// The x86-64 instruction selector.
|
||||
(
|
||||
src_isa_x64
|
||||
.join("lower")
|
||||
.join("isle")
|
||||
.join("generated_code.rs"),
|
||||
vec![
|
||||
clif_isle,
|
||||
prelude_isle,
|
||||
src_isa_x64.join("inst.isle"),
|
||||
src_isa_x64.join("lower.isle"),
|
||||
],
|
||||
),
|
||||
];
|
||||
|
||||
let cur_dir = std::env::current_dir()?;
|
||||
for (out_file, mut files) in isle_compilations {
|
||||
for file in files.iter_mut() {
|
||||
println!("cargo:rerun-if-changed={}", file.display());
|
||||
|
||||
// Strip the current directory from the file paths, because `islec`
|
||||
// includes them in the generated source, and this helps us maintain
|
||||
// deterministic builds that don't include those local file paths.
|
||||
if let Ok(suffix) = file.strip_prefix(&cur_dir) {
|
||||
*file = suffix.to_path_buf();
|
||||
}
|
||||
}
|
||||
|
||||
let code = (|| {
|
||||
let lexer = isle::lexer::Lexer::from_files(files)?;
|
||||
let defs = isle::parser::parse(lexer)?;
|
||||
isle::compile::compile(&defs)
|
||||
})()
|
||||
.map_err(|e| {
|
||||
// Make sure to include the source snippets location info along with
|
||||
// the error messages.
|
||||
|
||||
let report = miette::Report::new(e);
|
||||
return DebugReport(report);
|
||||
|
||||
struct DebugReport(miette::Report);
|
||||
|
||||
impl std::fmt::Display for DebugReport {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
self.0.handler().debug(&*self.0, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for DebugReport {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
std::fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for DebugReport {}
|
||||
})?;
|
||||
|
||||
println!("Writing ISLE-generated Rust code to {}", out_file.display());
|
||||
std::fs::write(out_file, code)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -17,3 +17,6 @@ cranelift-codegen-shared = { path = "../shared", version = "0.78.0" }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[features]
|
||||
rebuild-isle = []
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! Generate instruction data (including opcodes, formats, builders, etc.).
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
use cranelift_codegen_shared::constant_hash;
|
||||
|
||||
@@ -1084,6 +1085,243 @@ fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Fo
|
||||
fmtln!(fmt, "}")
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: &mut Formatter) {
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
fmt.multi_line(
|
||||
r#"
|
||||
;; GENERATED BY `gen_isle`. DO NOT EDIT!!!
|
||||
;;
|
||||
;; This ISLE file defines all the external type declarations for Cranelift's
|
||||
;; data structures that ISLE will process, such as `InstructionData` and
|
||||
;; `Opcode`.
|
||||
"#,
|
||||
);
|
||||
fmt.empty_line();
|
||||
|
||||
// Generate all the extern type declarations we need for various immediates.
|
||||
fmt.line(";;;; Extern type declarations for immediates ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
|
||||
fmt.empty_line();
|
||||
let imm_tys: BTreeSet<_> = formats
|
||||
.iter()
|
||||
.flat_map(|f| {
|
||||
f.imm_fields
|
||||
.iter()
|
||||
.map(|i| i.kind.rust_type.rsplit("::").next().unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
for ty in imm_tys {
|
||||
fmtln!(fmt, "(type {} (primitive {}))", ty, ty);
|
||||
}
|
||||
fmt.empty_line();
|
||||
|
||||
// Generate all of the value arrays we need for `InstructionData` as well as
|
||||
// the constructors and extractors for them.
|
||||
fmt.line(";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
|
||||
fmt.empty_line();
|
||||
let value_array_arities: BTreeSet<_> = formats
|
||||
.iter()
|
||||
.filter(|f| f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1)
|
||||
.map(|f| f.num_value_operands)
|
||||
.collect();
|
||||
for n in value_array_arities {
|
||||
fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
|
||||
fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
|
||||
fmt.empty_line();
|
||||
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(decl value_array_{} ({}) ValueArray{})",
|
||||
n,
|
||||
(0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
|
||||
n
|
||||
);
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(extern constructor value_array_{} pack_value_array_{})",
|
||||
n,
|
||||
n
|
||||
);
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(extern extractor infallible value_array_{} unpack_value_array_{})",
|
||||
n,
|
||||
n
|
||||
);
|
||||
fmt.empty_line();
|
||||
}
|
||||
|
||||
// Generate the extern type declaration for `Opcode`.
|
||||
fmt.line(";;;; `Opcode` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
|
||||
fmt.empty_line();
|
||||
fmt.line("(type Opcode extern");
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("(enum");
|
||||
fmt.indent(|fmt| {
|
||||
for inst in instructions {
|
||||
fmtln!(fmt, "{}", inst.camel_name);
|
||||
}
|
||||
});
|
||||
fmt.line(")");
|
||||
});
|
||||
fmt.line(")");
|
||||
fmt.empty_line();
|
||||
|
||||
// Generate the extern type declaration for `InstructionData`.
|
||||
fmt.line(";;;; `InstructionData` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
|
||||
fmt.empty_line();
|
||||
fmt.line("(type InstructionData extern");
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("(enum");
|
||||
fmt.indent(|fmt| {
|
||||
for format in formats {
|
||||
let mut s = format!("({} (opcode Opcode)", format.name);
|
||||
if format.typevar_operand.is_some() {
|
||||
if format.has_value_list {
|
||||
s.push_str(" (args ValueList)");
|
||||
} else if format.num_value_operands == 1 {
|
||||
s.push_str(" (arg Value)");
|
||||
} else {
|
||||
write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
|
||||
}
|
||||
}
|
||||
for field in &format.imm_fields {
|
||||
write!(
|
||||
&mut s,
|
||||
" ({} {})",
|
||||
field.member,
|
||||
field.kind.rust_type.rsplit("::").next().unwrap()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
s.push(')');
|
||||
fmt.line(&s);
|
||||
}
|
||||
});
|
||||
fmt.line(")");
|
||||
});
|
||||
fmt.line(")");
|
||||
fmt.empty_line();
|
||||
|
||||
// Generate the helper extractors for each opcode's full instruction.
|
||||
//
|
||||
// TODO: if/when we port our peephole optimization passes to ISLE we will
|
||||
// want helper constructors as well.
|
||||
fmt.line(";;;; Extracting Opcode, Operands, and Immediates from `InstructionData` ;;;;;;;;");
|
||||
fmt.empty_line();
|
||||
for inst in instructions {
|
||||
fmtln!(
|
||||
fmt,
|
||||
"(decl {} ({}) Inst)",
|
||||
inst.name,
|
||||
inst.operands_in
|
||||
.iter()
|
||||
.map(|o| {
|
||||
let ty = o.kind.rust_type;
|
||||
if ty == "&[Value]" {
|
||||
"ValueSlice"
|
||||
} else {
|
||||
ty.rsplit("::").next().unwrap()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
);
|
||||
fmtln!(fmt, "(extractor");
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(
|
||||
fmt,
|
||||
"({} {})",
|
||||
inst.name,
|
||||
inst.operands_in
|
||||
.iter()
|
||||
.map(|o| { o.name })
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
);
|
||||
let mut s = format!(
|
||||
"(inst_data (InstructionData.{} (Opcode.{})",
|
||||
inst.format.name, inst.camel_name
|
||||
);
|
||||
|
||||
// Immediates.
|
||||
let imm_operands: Vec<_> = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| !o.is_value() && !o.is_varargs())
|
||||
.collect();
|
||||
assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
|
||||
for op in imm_operands {
|
||||
write!(&mut s, " {}", op.name).unwrap();
|
||||
}
|
||||
|
||||
// Value and varargs operands.
|
||||
if inst.format.typevar_operand.is_some() {
|
||||
if inst.format.has_value_list {
|
||||
// The instruction format uses a value list, but the
|
||||
// instruction itself might have not only a `&[Value]`
|
||||
// varargs operand, but also one or more `Value` operands as
|
||||
// well. If this is the case, then we need to read them off
|
||||
// the front of the `ValueList`.
|
||||
let values: Vec<_> = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| o.is_value())
|
||||
.map(|o| o.name)
|
||||
.collect();
|
||||
let varargs = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.find(|o| o.is_varargs())
|
||||
.unwrap()
|
||||
.name;
|
||||
if values.is_empty() {
|
||||
write!(&mut s, " (value_list_slice {})", varargs).unwrap();
|
||||
} else {
|
||||
write!(
|
||||
&mut s,
|
||||
" (unwrap_head_value_list_{} {} {})",
|
||||
values.len(),
|
||||
values.join(" "),
|
||||
varargs
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else if inst.format.num_value_operands == 1 {
|
||||
write!(
|
||||
&mut s,
|
||||
" {}",
|
||||
inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
let values = inst
|
||||
.operands_in
|
||||
.iter()
|
||||
.filter(|o| o.is_value())
|
||||
.map(|o| o.name)
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(values.len(), inst.format.num_value_operands);
|
||||
let values = values.join(" ");
|
||||
write!(
|
||||
&mut s,
|
||||
" (value_array_{} {})",
|
||||
inst.format.num_value_operands, values,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
s.push_str("))");
|
||||
fmt.line(&s);
|
||||
});
|
||||
fmt.line(")");
|
||||
fmt.empty_line();
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a Builder trait with methods for all instructions.
|
||||
fn gen_builder(
|
||||
instructions: &AllInstructions,
|
||||
@@ -1128,7 +1366,9 @@ pub(crate) fn generate(
|
||||
all_inst: &AllInstructions,
|
||||
opcode_filename: &str,
|
||||
inst_builder_filename: &str,
|
||||
isle_filename: &str,
|
||||
out_dir: &str,
|
||||
crate_dir: &Path,
|
||||
) -> Result<(), error::Error> {
|
||||
// Opcodes.
|
||||
let mut fmt = Formatter::new();
|
||||
@@ -1144,6 +1384,20 @@ pub(crate) fn generate(
|
||||
gen_try_from(all_inst, &mut fmt);
|
||||
fmt.update_file(opcode_filename, out_dir)?;
|
||||
|
||||
// ISLE DSL.
|
||||
#[cfg(feature = "rebuild-isle")]
|
||||
{
|
||||
let mut fmt = Formatter::new();
|
||||
gen_isle(&formats, all_inst, &mut fmt);
|
||||
let crate_src_dir = crate_dir.join("src");
|
||||
fmt.update_file(isle_filename, &crate_src_dir.display().to_string())?;
|
||||
}
|
||||
#[cfg(not(feature = "rebuild-isle"))]
|
||||
{
|
||||
// Silence unused variable warnings.
|
||||
let _ = (isle_filename, crate_dir);
|
||||
}
|
||||
|
||||
// Instruction builder.
|
||||
let mut fmt = Formatter::new();
|
||||
gen_builder(all_inst, &formats, &mut fmt);
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
//! This crate generates Rust sources for use by
|
||||
//! [`cranelift_codegen`](../cranelift_codegen/index.html).
|
||||
|
||||
use std::path::Path;
|
||||
#[macro_use]
|
||||
mod cdsl;
|
||||
mod srcgen;
|
||||
@@ -21,7 +23,7 @@ pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> {
|
||||
}
|
||||
|
||||
/// Generates all the Rust source files used in Cranelift from the meta-language.
|
||||
pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
||||
pub fn generate(isas: &[isa::Isa], out_dir: &str, crate_dir: &Path) -> Result<(), error::Error> {
|
||||
// Create all the definitions:
|
||||
// - common definitions.
|
||||
let mut shared_defs = shared::define();
|
||||
@@ -46,7 +48,9 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
||||
&shared_defs.all_instructions,
|
||||
"opcodes.rs",
|
||||
"inst_builder.rs",
|
||||
"clif.isle",
|
||||
&out_dir,
|
||||
crate_dir,
|
||||
)?;
|
||||
|
||||
for isa in target_isas {
|
||||
|
||||
@@ -100,6 +100,7 @@ impl Formatter {
|
||||
let path_str = format!("{}/{}", directory, filename.as_ref());
|
||||
|
||||
let path = path::Path::new(&path_str);
|
||||
println!("Writing generated file: {}", path.display());
|
||||
let mut f = fs::File::create(path)?;
|
||||
|
||||
for l in self.lines.iter().map(|l| l.as_bytes()) {
|
||||
|
||||
1635
cranelift/codegen/src/clif.isle
Normal file
1635
cranelift/codegen/src/clif.isle
Normal file
File diff suppressed because it is too large
Load Diff
933
cranelift/codegen/src/isa/x64/inst.isle
Normal file
933
cranelift/codegen/src/isa/x64/inst.isle
Normal file
@@ -0,0 +1,933 @@
|
||||
;; Extern type definitions and constructors for the x64 `MachInst` type.
|
||||
|
||||
;;;; `MInst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type MInst extern
|
||||
(enum (Nop (len u8))
|
||||
(AluRmiR (size OperandSize)
|
||||
(op AluRmiROpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMemImm)
|
||||
(dst WritableReg))
|
||||
(XmmRmR (op SseOpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMem)
|
||||
(dst WritableReg))
|
||||
(XmmUnaryRmR (op SseOpcode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(XmmRmiReg (opcode SseOpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMemImm)
|
||||
(dst WritableReg))
|
||||
(XmmRmRImm (op SseOpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMem)
|
||||
(dst WritableReg)
|
||||
(imm u8)
|
||||
(size OperandSize))
|
||||
(CmpRmiR (size OperandSize)
|
||||
(opcode CmpOpcode)
|
||||
(src RegMemImm)
|
||||
(dst Reg))
|
||||
(Imm (dst_size OperandSize)
|
||||
(simm64 u64)
|
||||
(dst WritableReg))
|
||||
(ShiftR (size OperandSize)
|
||||
(kind ShiftKind)
|
||||
(src Reg)
|
||||
(num_bits Imm8Reg)
|
||||
(dst WritableReg))
|
||||
(MovzxRmR (ext_mode ExtMode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(MovsxRmR (ext_mode ExtMode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(Cmove (size OperandSize)
|
||||
(cc CC)
|
||||
(consequent RegMem)
|
||||
(alternative Reg)
|
||||
(dst WritableReg))
|
||||
(XmmRmREvex (op Avx512Opcode)
|
||||
(src1 RegMem)
|
||||
(src2 Reg)
|
||||
(dst WritableReg))))
|
||||
|
||||
(type OperandSize extern
|
||||
(enum Size8
|
||||
Size16
|
||||
Size32
|
||||
Size64))
|
||||
|
||||
;; Get the `OperandSize` for a given `Type`.
|
||||
(decl operand_size_of_type (Type) OperandSize)
|
||||
(extern constructor operand_size_of_type operand_size_of_type)
|
||||
|
||||
;; Get the bit width of an `OperandSize`.
|
||||
(decl operand_size_bits (OperandSize) u16)
|
||||
(rule (operand_size_bits (OperandSize.Size8)) 8)
|
||||
(rule (operand_size_bits (OperandSize.Size16)) 16)
|
||||
(rule (operand_size_bits (OperandSize.Size32)) 32)
|
||||
(rule (operand_size_bits (OperandSize.Size64)) 64)
|
||||
|
||||
(type AluRmiROpcode extern
|
||||
(enum Add
|
||||
Adc
|
||||
Sub
|
||||
Sbb
|
||||
And
|
||||
Or
|
||||
Xor
|
||||
Mul
|
||||
And8
|
||||
Or8))
|
||||
|
||||
(type SseOpcode extern
|
||||
(enum Addps
|
||||
Addpd
|
||||
Addss
|
||||
Addsd
|
||||
Andps
|
||||
Andpd
|
||||
Andnps
|
||||
Andnpd
|
||||
Blendvpd
|
||||
Blendvps
|
||||
Comiss
|
||||
Comisd
|
||||
Cmpps
|
||||
Cmppd
|
||||
Cmpss
|
||||
Cmpsd
|
||||
Cvtdq2ps
|
||||
Cvtdq2pd
|
||||
Cvtpd2ps
|
||||
Cvtps2pd
|
||||
Cvtsd2ss
|
||||
Cvtsd2si
|
||||
Cvtsi2ss
|
||||
Cvtsi2sd
|
||||
Cvtss2si
|
||||
Cvtss2sd
|
||||
Cvttpd2dq
|
||||
Cvttps2dq
|
||||
Cvttss2si
|
||||
Cvttsd2si
|
||||
Divps
|
||||
Divpd
|
||||
Divss
|
||||
Divsd
|
||||
Insertps
|
||||
Maxps
|
||||
Maxpd
|
||||
Maxss
|
||||
Maxsd
|
||||
Minps
|
||||
Minpd
|
||||
Minss
|
||||
Minsd
|
||||
Movaps
|
||||
Movapd
|
||||
Movd
|
||||
Movdqa
|
||||
Movdqu
|
||||
Movlhps
|
||||
Movmskps
|
||||
Movmskpd
|
||||
Movq
|
||||
Movss
|
||||
Movsd
|
||||
Movups
|
||||
Movupd
|
||||
Mulps
|
||||
Mulpd
|
||||
Mulss
|
||||
Mulsd
|
||||
Orps
|
||||
Orpd
|
||||
Pabsb
|
||||
Pabsw
|
||||
Pabsd
|
||||
Packssdw
|
||||
Packsswb
|
||||
Packusdw
|
||||
Packuswb
|
||||
Paddb
|
||||
Paddd
|
||||
Paddq
|
||||
Paddw
|
||||
Paddsb
|
||||
Paddsw
|
||||
Paddusb
|
||||
Paddusw
|
||||
Palignr
|
||||
Pand
|
||||
Pandn
|
||||
Pavgb
|
||||
Pavgw
|
||||
Pblendvb
|
||||
Pcmpeqb
|
||||
Pcmpeqw
|
||||
Pcmpeqd
|
||||
Pcmpeqq
|
||||
Pcmpgtb
|
||||
Pcmpgtw
|
||||
Pcmpgtd
|
||||
Pcmpgtq
|
||||
Pextrb
|
||||
Pextrw
|
||||
Pextrd
|
||||
Pinsrb
|
||||
Pinsrw
|
||||
Pinsrd
|
||||
Pmaddubsw
|
||||
Pmaddwd
|
||||
Pmaxsb
|
||||
Pmaxsw
|
||||
Pmaxsd
|
||||
Pmaxub
|
||||
Pmaxuw
|
||||
Pmaxud
|
||||
Pminsb
|
||||
Pminsw
|
||||
Pminsd
|
||||
Pminub
|
||||
Pminuw
|
||||
Pminud
|
||||
Pmovmskb
|
||||
Pmovsxbd
|
||||
Pmovsxbw
|
||||
Pmovsxbq
|
||||
Pmovsxwd
|
||||
Pmovsxwq
|
||||
Pmovsxdq
|
||||
Pmovzxbd
|
||||
Pmovzxbw
|
||||
Pmovzxbq
|
||||
Pmovzxwd
|
||||
Pmovzxwq
|
||||
Pmovzxdq
|
||||
Pmuldq
|
||||
Pmulhw
|
||||
Pmulhuw
|
||||
Pmulhrsw
|
||||
Pmulld
|
||||
Pmullw
|
||||
Pmuludq
|
||||
Por
|
||||
Pshufb
|
||||
Pshufd
|
||||
Psllw
|
||||
Pslld
|
||||
Psllq
|
||||
Psraw
|
||||
Psrad
|
||||
Psrlw
|
||||
Psrld
|
||||
Psrlq
|
||||
Psubb
|
||||
Psubd
|
||||
Psubq
|
||||
Psubw
|
||||
Psubsb
|
||||
Psubsw
|
||||
Psubusb
|
||||
Psubusw
|
||||
Ptest
|
||||
Punpckhbw
|
||||
Punpckhwd
|
||||
Punpcklbw
|
||||
Punpcklwd
|
||||
Pxor
|
||||
Rcpss
|
||||
Roundps
|
||||
Roundpd
|
||||
Roundss
|
||||
Roundsd
|
||||
Rsqrtss
|
||||
Shufps
|
||||
Sqrtps
|
||||
Sqrtpd
|
||||
Sqrtss
|
||||
Sqrtsd
|
||||
Subps
|
||||
Subpd
|
||||
Subss
|
||||
Subsd
|
||||
Ucomiss
|
||||
Ucomisd
|
||||
Unpcklps
|
||||
Xorps
|
||||
Xorpd))
|
||||
|
||||
(type CmpOpcode extern
|
||||
(enum Cmp
|
||||
Test))
|
||||
|
||||
(type RegMemImm extern
|
||||
(enum
|
||||
(Reg (reg Reg))
|
||||
(Mem (addr SyntheticAmode))
|
||||
(Imm (simm32 u32))))
|
||||
|
||||
(type RegMem extern
|
||||
(enum
|
||||
(Reg (reg Reg))
|
||||
(Mem (addr SyntheticAmode))))
|
||||
|
||||
;; Put the given clif value into a `RegMem` operand.
|
||||
;;
|
||||
;; Asserts that the value fits into a single register, and doesn't require
|
||||
;; multiple registers for its representation (like `i128` for example).
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_reg_mem (Value) RegMem)
|
||||
(extern constructor put_in_reg_mem put_in_reg_mem)
|
||||
|
||||
(type SyntheticAmode extern (enum))
|
||||
|
||||
(type ShiftKind extern
|
||||
(enum ShiftLeft
|
||||
ShiftRightLogical
|
||||
ShiftRightArithmetic
|
||||
RotateLeft
|
||||
RotateRight))
|
||||
|
||||
(type Imm8Reg extern
|
||||
(enum (Imm8 (imm u8))
|
||||
(Reg (reg Reg))))
|
||||
|
||||
(type CC extern
|
||||
(enum O
|
||||
NO
|
||||
B
|
||||
NB
|
||||
Z
|
||||
NZ
|
||||
BE
|
||||
NBE
|
||||
S
|
||||
NS
|
||||
L
|
||||
NL
|
||||
LE
|
||||
NLE
|
||||
P
|
||||
NP))
|
||||
|
||||
(type Avx512Opcode extern
|
||||
(enum Vcvtudq2ps
|
||||
Vpabsq
|
||||
Vpermi2b
|
||||
Vpmullq
|
||||
Vpopcntb))
|
||||
|
||||
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl avx512vl_enabled () Type)
|
||||
(extern extractor avx512vl_enabled avx512vl_enabled)
|
||||
|
||||
(decl avx512dq_enabled () Type)
|
||||
(extern extractor avx512dq_enabled avx512dq_enabled)
|
||||
|
||||
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Extract a constant `Imm8Reg.Imm8` from a value operand.
|
||||
(decl imm8_from_value (Imm8Reg) Value)
|
||||
(extern extractor imm8_from_value imm8_from_value)
|
||||
|
||||
;; Extract a constant `RegMemImm.Imm` from a value operand.
|
||||
(decl simm32_from_value (RegMemImm) Value)
|
||||
(extern extractor simm32_from_value simm32_from_value)
|
||||
|
||||
;; Extract a constant `RegMemImm.Imm` from an `Imm64` immediate.
|
||||
(decl simm32_from_imm64 (RegMemImm) Imm64)
|
||||
(extern extractor simm32_from_imm64 simm32_from_imm64)
|
||||
|
||||
;; A load that can be sunk into another operation.
|
||||
(type SinkableLoad extern (enum))
|
||||
|
||||
;; Extract a `SinkableLoad` that works with `RegMemImm.Mem` from a value
|
||||
;; operand.
|
||||
(decl sinkable_load (SinkableLoad) Value)
|
||||
(extern extractor sinkable_load sinkable_load)
|
||||
|
||||
;; Sink a `SinkableLoad` into a `RegMemImm.Mem`.
|
||||
;;
|
||||
;; This is a side-effectful operation that notifies the context that the
|
||||
;; instruction that produced the `SinkableImm` has been sunk into another
|
||||
;; instruction, and no longer needs to be lowered.
|
||||
(decl sink_load (SinkableLoad) RegMemImm)
|
||||
(extern constructor sink_load sink_load)
|
||||
|
||||
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that are used for their
|
||||
;; effect on flags.
|
||||
(type ProducesFlags (enum (ProducesFlags (inst MInst) (result Reg))))
|
||||
|
||||
;; Newtype wrapper around `MInst` for instructions that consume flags.
|
||||
(type ConsumesFlags (enum (ConsumesFlags (inst MInst) (result Reg))))
|
||||
|
||||
;; Combine flags-producing and -consuming instructions together, ensuring that
|
||||
;; they are emitted back-to-back and no other instructions can be emitted
|
||||
;; between them and potentially clobber the flags.
|
||||
;;
|
||||
;; Returns a `ValueRegs` where the first register is the result of the
|
||||
;; `ProducesFlags` instruction and the second is the result of the
|
||||
;; `ConsumesFlags` instruction.
|
||||
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
|
||||
(rule (with_flags (ProducesFlags.ProducesFlags producer_inst producer_result)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
(value_regs producer_result consumer_result)))
|
||||
|
||||
;; Like `with_flags` but returns only the result of the consumer operation.
|
||||
(decl with_flags_1 (ProducesFlags ConsumesFlags) Reg)
|
||||
(rule (with_flags_1 (ProducesFlags.ProducesFlags producer_inst _producer_result)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst)))
|
||||
consumer_result))
|
||||
|
||||
;; Like `with_flags` but allows two consumers of the same flags. The result is a
|
||||
;; `ValueRegs` containing the first consumer's result and then the second
|
||||
;; consumer's result.
|
||||
(decl with_flags_2 (ProducesFlags ConsumesFlags ConsumesFlags) ValueRegs)
|
||||
(rule (with_flags_2 (ProducesFlags.ProducesFlags producer_inst producer_result)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst_1 consumer_result_1)
|
||||
(ConsumesFlags.ConsumesFlags consumer_inst_2 consumer_result_2))
|
||||
(let ((_x Unit (emit producer_inst))
|
||||
(_y Unit (emit consumer_inst_1))
|
||||
(_z Unit (emit consumer_inst_2)))
|
||||
(value_regs consumer_result_1 consumer_result_2)))
|
||||
|
||||
;;;; Helpers for Sign/Zero Extending ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type ExtendKind (enum Sign Zero))
|
||||
|
||||
(type ExtMode extern (enum BL BQ WL WQ LQ))
|
||||
|
||||
;; `ExtMode::new`
|
||||
(decl ext_mode (u16 u16) ExtMode)
|
||||
(extern constructor ext_mode ext_mode)
|
||||
|
||||
;; Put the given value into a register, but extended as the given type.
|
||||
(decl extend_to_reg (Value Type ExtendKind) Reg)
|
||||
|
||||
;; If the value is already of the requested type, no extending is necessary.
|
||||
(rule (extend_to_reg (and val (value_type ty)) =ty _kind)
|
||||
(put_in_reg val))
|
||||
|
||||
(rule (extend_to_reg (and val (value_type from_ty))
|
||||
to_ty
|
||||
kind)
|
||||
(let ((from_bits u16 (ty_bits from_ty))
|
||||
;; Use `operand_size_of_type` so that the we clamp the output to 32-
|
||||
;; or 64-bit width types.
|
||||
(to_bits u16 (operand_size_bits (operand_size_of_type to_ty))))
|
||||
(extend kind
|
||||
to_ty
|
||||
(ext_mode from_bits to_bits)
|
||||
(put_in_reg_mem val))))
|
||||
|
||||
;; Do a sign or zero extension of the given `RegMem`.
|
||||
(decl extend (ExtendKind Type ExtMode RegMem) Reg)
|
||||
|
||||
;; Zero extending uses `movzx`.
|
||||
(rule (extend (ExtendKind.Zero) ty mode src)
|
||||
(movzx ty mode src))
|
||||
|
||||
;; Sign extending uses `movsx`.
|
||||
(rule (extend (ExtendKind.Sign) ty mode src)
|
||||
(movsx ty mode src))
|
||||
|
||||
;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; These constructors create SSA-style `MInst`s. It is their responsibility to
|
||||
;; maintain the invariant that each temporary register they allocate and define
|
||||
;; only gets defined the once.
|
||||
|
||||
;; Emit an instruction.
|
||||
;;
|
||||
;; This is low-level and side-effectful; it should only be used as an
|
||||
;; implementation detail by helpers that preserve the SSA facade themselves.
|
||||
(decl emit (MInst) Unit)
|
||||
(extern constructor emit emit)
|
||||
|
||||
;; Helper for emitting `MInst.AluRmiR` instructions.
|
||||
(decl alu_rmi_r (Type AluRmiROpcode Reg RegMemImm) Reg)
|
||||
(rule (alu_rmi_r ty opcode src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.AluRmiR size opcode src1 src2 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for emitting `add` instructions.
|
||||
(decl add (Type Reg RegMemImm) Reg)
|
||||
(rule (add ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Add)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for creating `add` instructions whose flags are also used.
|
||||
(decl add_with_flags (Type Reg RegMemImm) ProducesFlags)
|
||||
(rule (add_with_flags ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Add)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `adc` instructions.
|
||||
(decl adc (Type Reg RegMemImm) ConsumesFlags)
|
||||
(rule (adc ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Adc)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for emitting `sub` instructions.
|
||||
(decl sub (Type Reg RegMemImm) Reg)
|
||||
(rule (sub ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Sub)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for creating `sub` instructions whose flags are also used.
|
||||
(decl sub_with_flags (Type Reg RegMemImm) ProducesFlags)
|
||||
(rule (sub_with_flags ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Sub)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `sbb` instructions.
|
||||
(decl sbb (Type Reg RegMemImm) ConsumesFlags)
|
||||
(rule (sbb ty src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type ty)
|
||||
(AluRmiROpcode.Sbb)
|
||||
src1
|
||||
src2
|
||||
dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `mul` instructions.
|
||||
(decl mul (Type Reg RegMemImm) Reg)
|
||||
(rule (mul ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Mul)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting `and` instructions.
|
||||
;;
|
||||
;; Use `m_` prefix (short for "mach inst") to disambiguate with the ISLE-builtin
|
||||
;; `and` operator.
|
||||
(decl m_and (Type Reg RegMemImm) Reg)
|
||||
(rule (m_and ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.And)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting `or` instructions.
|
||||
(decl or (Type Reg RegMemImm) Reg)
|
||||
(rule (or ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Or)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting `xor` instructions.
|
||||
(decl xor (Type Reg RegMemImm) Reg)
|
||||
(rule (xor ty src1 src2)
|
||||
(alu_rmi_r ty
|
||||
(AluRmiROpcode.Xor)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting immediates.
|
||||
(decl imm (Type u64) Reg)
|
||||
(rule (imm ty simm64)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.Imm size simm64 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
(decl nonzero_u64_fits_in_u32 (u64) u64)
|
||||
(extern extractor nonzero_u64_fits_in_u32 nonzero_u64_fits_in_u32)
|
||||
|
||||
;; Special case for when a 64-bit immediate fits into 32-bits. We can use a
|
||||
;; 32-bit move that zero-extends the value, which has a smaller encoding.
|
||||
(rule (imm $I64 (nonzero_u64_fits_in_u32 x))
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.Imm (OperandSize.Size32) x dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Special case for zero immediates: turn them into an `xor r, r`.
|
||||
(rule (imm ty 0)
|
||||
(let ((wr WritableReg (temp_writable_reg ty))
|
||||
(r Reg (writable_reg_to_reg wr))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.AluRmiR size
|
||||
(AluRmiROpcode.Xor)
|
||||
r
|
||||
(RegMemImm.Reg r)
|
||||
wr))))
|
||||
r))
|
||||
|
||||
;; Helper for creating `MInst.ShifR` instructions.
|
||||
(decl shift_r (Type ShiftKind Reg Imm8Reg) Reg)
|
||||
(rule (shift_r ty kind src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty))
|
||||
(_ Unit (emit (MInst.ShiftR size kind src1 src2 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `rotl` instructions (prefixed with "m_", short for "mach
|
||||
;; inst", to disambiguate this from clif's `rotl`).
|
||||
(decl m_rotl (Type Reg Imm8Reg) Reg)
|
||||
(rule (m_rotl ty src1 src2)
|
||||
(shift_r ty (ShiftKind.RotateLeft) src1 src2))
|
||||
|
||||
;; Helper for creating `shl` instructions.
|
||||
(decl shl (Type Reg Imm8Reg) Reg)
|
||||
(rule (shl ty src1 src2)
|
||||
(shift_r ty (ShiftKind.ShiftLeft) src1 src2))
|
||||
|
||||
;; Helper for creating logical shift-right instructions.
|
||||
(decl shr (Type Reg Imm8Reg) Reg)
|
||||
(rule (shr ty src1 src2)
|
||||
(shift_r ty (ShiftKind.ShiftRightLogical) src1 src2))
|
||||
|
||||
;; Helper for creating arithmetic shift-right instructions.
|
||||
(decl sar (Type Reg Imm8Reg) Reg)
|
||||
(rule (sar ty src1 src2)
|
||||
(shift_r ty (ShiftKind.ShiftRightArithmetic) src1 src2))
|
||||
|
||||
;; Helper for creating `MInst.CmpRmiR` instructions.
|
||||
(decl cmp_rmi_r (OperandSize CmpOpcode RegMemImm Reg) ProducesFlags)
|
||||
(rule (cmp_rmi_r size opcode src1 src2)
|
||||
(ProducesFlags.ProducesFlags (MInst.CmpRmiR size
|
||||
opcode
|
||||
src1
|
||||
src2)
|
||||
(invalid_reg)))
|
||||
|
||||
;; Helper for creating `cmp` instructions.
|
||||
(decl cmp (OperandSize RegMemImm Reg) ProducesFlags)
|
||||
(rule (cmp size src1 src2)
|
||||
(cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
|
||||
|
||||
;; Helper for creating `test` instructions.
|
||||
(decl test (OperandSize RegMemImm Reg) ProducesFlags)
|
||||
(rule (test size src1 src2)
|
||||
(cmp_rmi_r size (CmpOpcode.Test) src1 src2))
|
||||
|
||||
;; Helper for creating `MInst.Cmove` instructions.
|
||||
(decl cmove (Type CC RegMem Reg) ConsumesFlags)
|
||||
(rule (cmove ty cc consequent alternative)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(size OperandSize (operand_size_of_type ty)))
|
||||
(ConsumesFlags.ConsumesFlags (MInst.Cmove size cc consequent alternative dst)
|
||||
(writable_reg_to_reg dst))))
|
||||
|
||||
;; Helper for creating `MInst.MovzxRmR` instructions.
|
||||
(decl movzx (Type ExtMode RegMem) Reg)
|
||||
(rule (movzx ty mode src)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.MovzxRmR mode src dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `MInst.MovsxRmR` instructions.
|
||||
(decl movsx (Type ExtMode RegMem) Reg)
|
||||
(rule (movsx ty mode src)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.MovsxRmR mode src dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmR` instructions.
|
||||
(decl xmm_rm_r (Type SseOpcode Reg RegMem) Reg)
|
||||
(rule (xmm_rm_r ty op src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.XmmRmR op src1 src2 dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `paddb` instructions.
|
||||
(decl paddb (Reg RegMem) Reg)
|
||||
(rule (paddb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Paddb) src1 src2))
|
||||
|
||||
;; Helper for creating `paddw` instructions.
|
||||
(decl paddw (Reg RegMem) Reg)
|
||||
(rule (paddw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Paddw) src1 src2))
|
||||
|
||||
;; Helper for creating `paddd` instructions.
|
||||
(decl paddd (Reg RegMem) Reg)
|
||||
(rule (paddd src1 src2)
|
||||
(xmm_rm_r $I32X4 (SseOpcode.Paddd) src1 src2))
|
||||
|
||||
;; Helper for creating `paddq` instructions.
|
||||
(decl paddq (Reg RegMem) Reg)
|
||||
(rule (paddq src1 src2)
|
||||
(xmm_rm_r $I64X2 (SseOpcode.Paddq) src1 src2))
|
||||
|
||||
;; Helper for creating `paddsb` instructions.
|
||||
(decl paddsb (Reg RegMem) Reg)
|
||||
(rule (paddsb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Paddsb) src1 src2))
|
||||
|
||||
;; Helper for creating `paddsw` instructions.
|
||||
(decl paddsw (Reg RegMem) Reg)
|
||||
(rule (paddsw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Paddsw) src1 src2))
|
||||
|
||||
;; Helper for creating `paddusb` instructions.
|
||||
(decl paddusb (Reg RegMem) Reg)
|
||||
(rule (paddusb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Paddusb) src1 src2))
|
||||
|
||||
;; Helper for creating `paddusw` instructions.
|
||||
(decl paddusw (Reg RegMem) Reg)
|
||||
(rule (paddusw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Paddusw) src1 src2))
|
||||
|
||||
;; Helper for creating `psubb` instructions.
|
||||
(decl psubb (Reg RegMem) Reg)
|
||||
(rule (psubb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Psubb) src1 src2))
|
||||
|
||||
;; Helper for creating `psubw` instructions.
|
||||
(decl psubw (Reg RegMem) Reg)
|
||||
(rule (psubw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Psubw) src1 src2))
|
||||
|
||||
;; Helper for creating `psubd` instructions.
|
||||
(decl psubd (Reg RegMem) Reg)
|
||||
(rule (psubd src1 src2)
|
||||
(xmm_rm_r $I32X4 (SseOpcode.Psubd) src1 src2))
|
||||
|
||||
;; Helper for creating `psubq` instructions.
|
||||
(decl psubq (Reg RegMem) Reg)
|
||||
(rule (psubq src1 src2)
|
||||
(xmm_rm_r $I64X2 (SseOpcode.Psubq) src1 src2))
|
||||
|
||||
;; Helper for creating `psubsb` instructions.
|
||||
(decl psubsb (Reg RegMem) Reg)
|
||||
(rule (psubsb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Psubsb) src1 src2))
|
||||
|
||||
;; Helper for creating `psubsw` instructions.
|
||||
(decl psubsw (Reg RegMem) Reg)
|
||||
(rule (psubsw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Psubsw) src1 src2))
|
||||
|
||||
;; Helper for creating `psubusb` instructions.
|
||||
(decl psubusb (Reg RegMem) Reg)
|
||||
(rule (psubusb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Psubusb) src1 src2))
|
||||
|
||||
;; Helper for creating `psubusw` instructions.
|
||||
(decl psubusw (Reg RegMem) Reg)
|
||||
(rule (psubusw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Psubusw) src1 src2))
|
||||
|
||||
;; Helper for creating `pavgb` instructions.
|
||||
(decl pavgb (Reg RegMem) Reg)
|
||||
(rule (pavgb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pavgb) src1 src2))
|
||||
|
||||
;; Helper for creating `pavgw` instructions.
|
||||
(decl pavgw (Reg RegMem) Reg)
|
||||
(rule (pavgw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pavgw) src1 src2))
|
||||
|
||||
;; Helper for creating `pand` instructions.
|
||||
(decl pand (Reg RegMem) Reg)
|
||||
(rule (pand src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Pand) src1 src2))
|
||||
|
||||
;; Helper for creating `andps` instructions.
|
||||
(decl andps (Reg RegMem) Reg)
|
||||
(rule (andps src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Andps) src1 src2))
|
||||
|
||||
;; Helper for creating `andpd` instructions.
|
||||
(decl andpd (Reg RegMem) Reg)
|
||||
(rule (andpd src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Andpd) src1 src2))
|
||||
|
||||
;; Helper for creating `por` instructions.
|
||||
(decl por (Reg RegMem) Reg)
|
||||
(rule (por src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Por) src1 src2))
|
||||
|
||||
;; Helper for creating `orps` instructions.
|
||||
(decl orps (Reg RegMem) Reg)
|
||||
(rule (orps src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Orps) src1 src2))
|
||||
|
||||
;; Helper for creating `orpd` instructions.
|
||||
(decl orpd (Reg RegMem) Reg)
|
||||
(rule (orpd src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Orpd) src1 src2))
|
||||
|
||||
;; Helper for creating `pxor` instructions.
|
||||
(decl pxor (Reg RegMem) Reg)
|
||||
(rule (pxor src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pxor) src1 src2))
|
||||
|
||||
;; Helper for creating `xorps` instructions.
|
||||
(decl xorps (Reg RegMem) Reg)
|
||||
(rule (xorps src1 src2)
|
||||
(xmm_rm_r $F32X4 (SseOpcode.Xorps) src1 src2))
|
||||
|
||||
;; Helper for creating `xorpd` instructions.
|
||||
(decl xorpd (Reg RegMem) Reg)
|
||||
(rule (xorpd src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Xorpd) src1 src2))
|
||||
|
||||
;; Helper for creating `pmullw` instructions.
|
||||
(decl pmullw (Reg RegMem) Reg)
|
||||
(rule (pmullw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmullw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmulld` instructions.
|
||||
(decl pmulld (Reg RegMem) Reg)
|
||||
(rule (pmulld src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmulld) src1 src2))
|
||||
|
||||
;; Helper for creating `pmulhw` instructions.
|
||||
(decl pmulhw (Reg RegMem) Reg)
|
||||
(rule (pmulhw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmulhw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmulhuw` instructions.
|
||||
(decl pmulhuw (Reg RegMem) Reg)
|
||||
(rule (pmulhuw src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmulhuw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmuldq` instructions.
|
||||
(decl pmuldq (Reg RegMem) Reg)
|
||||
(rule (pmuldq src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Pmuldq) src1 src2))
|
||||
|
||||
;; Helper for creating `pmuludq` instructions.
|
||||
(decl pmuludq (Reg RegMem) Reg)
|
||||
(rule (pmuludq src1 src2)
|
||||
(xmm_rm_r $I64X2 (SseOpcode.Pmuludq) src1 src2))
|
||||
|
||||
;; Helper for creating `punpckhwd` instructions.
|
||||
(decl punpckhwd (Reg RegMem) Reg)
|
||||
(rule (punpckhwd src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Punpckhwd) src1 src2))
|
||||
|
||||
;; Helper for creating `punpcklwd` instructions.
|
||||
(decl punpcklwd (Reg RegMem) Reg)
|
||||
(rule (punpcklwd src1 src2)
|
||||
(xmm_rm_r $I16X8 (SseOpcode.Punpcklwd) src1 src2))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmRImm` instructions.
|
||||
(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
|
||||
(rule (xmm_rm_r_imm op src1 src2 imm size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmRmRImm op
|
||||
src1
|
||||
src2
|
||||
dst
|
||||
imm
|
||||
size))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `palignr` instructions.
|
||||
(decl palignr (Reg RegMem u8 OperandSize) Reg)
|
||||
(rule (palignr src1 src2 imm size)
|
||||
(xmm_rm_r_imm (SseOpcode.Palignr)
|
||||
src1
|
||||
src2
|
||||
imm
|
||||
size))
|
||||
|
||||
;; Helper for creating `pshufd` instructions.
|
||||
(decl pshufd (RegMem u8 OperandSize) Reg)
|
||||
(rule (pshufd src imm size)
|
||||
(let ((w_dst WritableReg (temp_writable_reg $I8X16))
|
||||
(dst Reg (writable_reg_to_reg w_dst))
|
||||
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pshufd)
|
||||
dst
|
||||
src
|
||||
w_dst
|
||||
imm
|
||||
size))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating `MInst.XmmUnaryRmR` instructions.
|
||||
(decl xmm_unary_rm_r (SseOpcode RegMem) Reg)
|
||||
(rule (xmm_unary_rm_r op src)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmUnaryRmR op src dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `pmovsxbw` instructions.
|
||||
(decl pmovsxbw (RegMem) Reg)
|
||||
(rule (pmovsxbw src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pmovsxbw) src))
|
||||
|
||||
;; Helper for creating `pmovzxbw` instructions.
|
||||
(decl pmovzxbw (RegMem) Reg)
|
||||
(rule (pmovzxbw src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pmovzxbw) src))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmREvex` instructions.
|
||||
(decl xmm_rm_r_evex (Avx512Opcode RegMem Reg) Reg)
|
||||
(rule (xmm_rm_r_evex op src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmRmREvex op
|
||||
src1
|
||||
src2
|
||||
dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `vpmullq` instructions.
|
||||
;;
|
||||
;; Requires AVX-512 vl and dq.
|
||||
(decl vpmullq (RegMem Reg) Reg)
|
||||
(rule (vpmullq src1 src2)
|
||||
(xmm_rm_r_evex (Avx512Opcode.Vpmullq)
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmiReg` instructions.
|
||||
(decl xmm_rmi_reg (SseOpcode Reg RegMemImm) Reg)
|
||||
(rule (xmm_rmi_reg op src1 src2)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmRmiReg op
|
||||
src1
|
||||
src2
|
||||
dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `psllq` instructions.
|
||||
(decl psllq (Reg RegMemImm) Reg)
|
||||
(rule (psllq src1 src2)
|
||||
(xmm_rmi_reg (SseOpcode.Psllq) src1 src2))
|
||||
|
||||
;; Helper for creating `psrlq` instructions.
|
||||
(decl psrlq (Reg RegMemImm) Reg)
|
||||
(rule (psrlq src1 src2)
|
||||
(xmm_rmi_reg (SseOpcode.Psrlq) src1 src2))
|
||||
@@ -1,14 +1,13 @@
|
||||
//! Instruction operand sub-components (aka "parts"): definitions and printing.
|
||||
|
||||
use super::regs::{self, show_ireg_sized};
|
||||
use super::EmitState;
|
||||
use super::{EmitState, RegMapper};
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::{MemFlags, Type};
|
||||
use crate::isa::x64::inst::Inst;
|
||||
use crate::machinst::*;
|
||||
use regalloc::{
|
||||
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
|
||||
RegUsageMapper, Writable,
|
||||
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, Writable,
|
||||
};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt;
|
||||
@@ -175,7 +174,7 @@ impl SyntheticAmode {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
pub(crate) fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
|
||||
match self {
|
||||
SyntheticAmode::Real(addr) => addr.map_uses(map),
|
||||
SyntheticAmode::NominalSPOffset { .. } => {
|
||||
@@ -285,6 +284,25 @@ impl PrettyPrintSized for RegMemImm {
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an 8-bit integer immediate or a register.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Imm8Reg {
|
||||
Imm8 { imm: u8 },
|
||||
Reg { reg: Reg },
|
||||
}
|
||||
|
||||
impl From<u8> for Imm8Reg {
|
||||
fn from(imm: u8) -> Self {
|
||||
Self::Imm8 { imm }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Reg> for Imm8Reg {
|
||||
fn from(reg: Reg) -> Self {
|
||||
Self::Reg { reg }
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
|
||||
/// 32, 64, or 128 bit value.
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -147,14 +147,16 @@ pub(crate) fn emit(
|
||||
Inst::AluRmiR {
|
||||
size,
|
||||
op,
|
||||
src,
|
||||
src1,
|
||||
src2,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, reg_g.to_reg());
|
||||
let mut rex = RexFlags::from(*size);
|
||||
if *op == AluRmiROpcode::Mul {
|
||||
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
|
||||
// we have to special-case it.
|
||||
match src {
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
@@ -213,7 +215,7 @@ pub(crate) fn emit(
|
||||
};
|
||||
assert!(!(is_8bit && *size == OperandSize::Size64));
|
||||
|
||||
match src {
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
if is_8bit {
|
||||
rex.always_emit_if_8bit_needed(*reg_e);
|
||||
@@ -323,8 +325,9 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Not { size, src } => {
|
||||
let rex_flags = RexFlags::from((*size, src.to_reg()));
|
||||
Inst::Not { size, src, dst } => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -333,12 +336,13 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
let subopcode = 2;
|
||||
let enc_src = int_reg_enc(src.to_reg());
|
||||
let enc_src = int_reg_enc(dst.to_reg());
|
||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
|
||||
}
|
||||
|
||||
Inst::Neg { size, src } => {
|
||||
let rex_flags = RexFlags::from((*size, src.to_reg()));
|
||||
Inst::Neg { size, src, dst } => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -347,15 +351,21 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
let subopcode = 3;
|
||||
let enc_src = int_reg_enc(src.to_reg());
|
||||
let enc_src = int_reg_enc(dst.to_reg());
|
||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
|
||||
}
|
||||
|
||||
Inst::Div {
|
||||
size,
|
||||
signed,
|
||||
dividend,
|
||||
divisor,
|
||||
dst_quotient,
|
||||
dst_remainder,
|
||||
} => {
|
||||
debug_assert_eq!(*dividend, regs::rax());
|
||||
debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
|
||||
debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -397,7 +407,18 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::MulHi { size, signed, rhs } => {
|
||||
Inst::MulHi {
|
||||
size,
|
||||
signed,
|
||||
src1,
|
||||
src2,
|
||||
dst_lo,
|
||||
dst_hi,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, regs::rax());
|
||||
debug_assert_eq!(dst_lo.to_reg(), regs::rax());
|
||||
debug_assert_eq!(dst_hi.to_reg(), regs::rdx());
|
||||
|
||||
let rex_flags = RexFlags::from(*size);
|
||||
let prefix = match size {
|
||||
OperandSize::Size16 => LegacyPrefixes::_66,
|
||||
@@ -407,7 +428,7 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
let subopcode = if *signed { 5 } else { 4 };
|
||||
match rhs {
|
||||
match src2 {
|
||||
RegMem::Reg { reg } => {
|
||||
let src = int_reg_enc(*reg);
|
||||
emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
|
||||
@@ -421,28 +442,39 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::SignExtendData { size } => match size {
|
||||
OperandSize::Size8 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x98);
|
||||
Inst::SignExtendData { size, src, dst } => {
|
||||
debug_assert_eq!(*src, regs::rax());
|
||||
debug_assert_eq!(dst.to_reg(), regs::rdx());
|
||||
match size {
|
||||
OperandSize::Size8 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x98);
|
||||
}
|
||||
OperandSize::Size16 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
OperandSize::Size32 => sink.put1(0x99),
|
||||
OperandSize::Size64 => {
|
||||
sink.put1(0x48);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
}
|
||||
OperandSize::Size16 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
OperandSize::Size32 => sink.put1(0x99),
|
||||
OperandSize::Size64 => {
|
||||
sink.put1(0x48);
|
||||
sink.put1(0x99);
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
Inst::CheckedDivOrRemSeq {
|
||||
kind,
|
||||
size,
|
||||
dividend,
|
||||
divisor,
|
||||
tmp,
|
||||
dst_quotient,
|
||||
dst_remainder,
|
||||
} => {
|
||||
debug_assert_eq!(*dividend, regs::rax());
|
||||
debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
|
||||
debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
|
||||
|
||||
// Generates the following code sequence:
|
||||
//
|
||||
// ;; check divide by zero:
|
||||
@@ -792,9 +824,11 @@ pub(crate) fn emit(
|
||||
Inst::ShiftR {
|
||||
size,
|
||||
kind,
|
||||
src,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let subopcode = match kind {
|
||||
ShiftKind::RotateLeft => 0,
|
||||
ShiftKind::RotateRight => 1,
|
||||
@@ -805,7 +839,8 @@ pub(crate) fn emit(
|
||||
let enc_dst = int_reg_enc(dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
match num_bits {
|
||||
None => {
|
||||
Imm8Reg::Reg { reg } => {
|
||||
debug_assert_eq!(*reg, regs::rcx());
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xD2, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xD3, LegacyPrefixes::_66),
|
||||
@@ -820,7 +855,7 @@ pub(crate) fn emit(
|
||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
|
||||
}
|
||||
|
||||
Some(num_bits) => {
|
||||
Imm8Reg::Imm8 { imm: num_bits } => {
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xC0, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xC1, LegacyPrefixes::_66),
|
||||
@@ -840,10 +875,16 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::XmmRmiReg { opcode, src, dst } => {
|
||||
Inst::XmmRmiReg {
|
||||
opcode,
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, dst.to_reg());
|
||||
let rex = RexFlags::clear_w();
|
||||
let prefix = LegacyPrefixes::_66;
|
||||
if let RegMemImm::Imm { simm32 } = src {
|
||||
if let RegMemImm::Imm { simm32 } = src2 {
|
||||
let (opcode_bytes, reg_digit) = match opcode {
|
||||
SseOpcode::Psllw => (0x0F71, 6),
|
||||
SseOpcode::Pslld => (0x0F72, 6),
|
||||
@@ -874,7 +915,7 @@ pub(crate) fn emit(
|
||||
_ => panic!("invalid opcode: {}", opcode),
|
||||
};
|
||||
|
||||
match src {
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex);
|
||||
}
|
||||
@@ -993,9 +1034,11 @@ pub(crate) fn emit(
|
||||
Inst::Cmove {
|
||||
size,
|
||||
cc,
|
||||
src,
|
||||
consequent,
|
||||
alternative,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
debug_assert_eq!(*alternative, reg_g.to_reg());
|
||||
let rex_flags = RexFlags::from(*size);
|
||||
let prefix = match size {
|
||||
OperandSize::Size16 => LegacyPrefixes::_66,
|
||||
@@ -1004,7 +1047,7 @@ pub(crate) fn emit(
|
||||
_ => unreachable!("invalid size spec for cmove"),
|
||||
};
|
||||
let opcode = 0x0F40 + cc.get_enc() as u32;
|
||||
match src {
|
||||
match consequent {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
|
||||
}
|
||||
@@ -1433,9 +1476,11 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::XmmRmR {
|
||||
op,
|
||||
src: src_e,
|
||||
src1,
|
||||
src2: src_e,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, reg_g.to_reg());
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode, length) = match op {
|
||||
SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
|
||||
@@ -1678,11 +1723,13 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::XmmRmRImm {
|
||||
op,
|
||||
src,
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
imm,
|
||||
size,
|
||||
} => {
|
||||
debug_assert_eq!(*src1, dst.to_reg());
|
||||
let (prefix, opcode, len) = match op {
|
||||
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
|
||||
SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
|
||||
@@ -1713,7 +1760,7 @@ pub(crate) fn emit(
|
||||
// `src` in ModRM's r/m field.
|
||||
_ => false,
|
||||
};
|
||||
match src {
|
||||
match src2 {
|
||||
RegMem::Reg { reg } => {
|
||||
if regs_swapped {
|
||||
emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
|
||||
@@ -2403,8 +2450,17 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::LockCmpxchg { ty, src, dst } => {
|
||||
// lock cmpxchg{b,w,l,q} %src, (dst)
|
||||
Inst::LockCmpxchg {
|
||||
ty,
|
||||
replacement,
|
||||
expected,
|
||||
mem,
|
||||
dst_old,
|
||||
} => {
|
||||
debug_assert_eq!(*expected, regs::rax());
|
||||
debug_assert_eq!(dst_old.to_reg(), regs::rax());
|
||||
|
||||
// lock cmpxchg{b,w,l,q} %replacement, (mem)
|
||||
// Note that 0xF0 is the Lock prefix.
|
||||
let (prefix, opcodes) = match *ty {
|
||||
types::I8 => (LegacyPrefixes::_F0, 0x0FB0),
|
||||
@@ -2413,12 +2469,34 @@ pub(crate) fn emit(
|
||||
types::I64 => (LegacyPrefixes::_F0, 0x0FB1),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rex = RexFlags::from((OperandSize::from_ty(*ty), *src));
|
||||
let amode = dst.finalize(state, sink);
|
||||
emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex);
|
||||
let rex = RexFlags::from((OperandSize::from_ty(*ty), *replacement));
|
||||
let amode = mem.finalize(state, sink);
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcodes,
|
||||
2,
|
||||
*replacement,
|
||||
&amode,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
|
||||
Inst::AtomicRmwSeq { ty, op } => {
|
||||
Inst::AtomicRmwSeq {
|
||||
ty,
|
||||
op,
|
||||
address,
|
||||
operand,
|
||||
temp,
|
||||
dst_old,
|
||||
} => {
|
||||
debug_assert_eq!(*address, regs::r9());
|
||||
debug_assert_eq!(*operand, regs::r10());
|
||||
debug_assert_eq!(temp.to_reg(), regs::r11());
|
||||
debug_assert_eq!(dst_old.to_reg(), regs::rax());
|
||||
|
||||
// Emit this:
|
||||
//
|
||||
// mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value
|
||||
@@ -2516,8 +2594,10 @@ pub(crate) fn emit(
|
||||
// No need to call `add_trap` here, since the `i4` emit will do that.
|
||||
let i4 = Inst::LockCmpxchg {
|
||||
ty: *ty,
|
||||
src: r11,
|
||||
dst: amode.into(),
|
||||
replacement: r11,
|
||||
expected: regs::rax(),
|
||||
mem: amode.into(),
|
||||
dst_old: Writable::from_reg(regs::rax()),
|
||||
};
|
||||
i4.emit(sink, info, state);
|
||||
|
||||
|
||||
@@ -4199,8 +4199,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rbx,
|
||||
dst: am1,
|
||||
mem: am1,
|
||||
replacement: rbx,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0410FB09C9241010000",
|
||||
"lock cmpxchgb %bl, 321(%r10,%rdx,4)",
|
||||
@@ -4209,8 +4211,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rdx,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rdx,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F00FB094F1C7CFFFFF",
|
||||
"lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
|
||||
@@ -4218,8 +4222,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0400FB0B4F1C7CFFFFF",
|
||||
"lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
|
||||
@@ -4227,8 +4233,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0440FB094F1C7CFFFFF",
|
||||
"lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
|
||||
@@ -4236,8 +4244,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I8,
|
||||
src: r15,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r15,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0440FB0BCF1C7CFFFFF",
|
||||
"lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
|
||||
@@ -4246,8 +4256,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I16,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"66F00FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
|
||||
@@ -4255,8 +4267,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I16,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"66F0440FB194F1C7CFFFFF",
|
||||
"lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
|
||||
@@ -4265,8 +4279,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I32,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F00FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
|
||||
@@ -4274,8 +4290,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I32,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0440FB194F1C7CFFFFF",
|
||||
"lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
|
||||
@@ -4284,8 +4302,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I64,
|
||||
src: rsi,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: rsi,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F0480FB1B4F1C7CFFFFF",
|
||||
"lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
|
||||
@@ -4293,8 +4313,10 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::LockCmpxchg {
|
||||
ty: types::I64,
|
||||
src: r10,
|
||||
dst: am2.clone(),
|
||||
mem: am2.clone(),
|
||||
replacement: r10,
|
||||
expected: rax,
|
||||
dst_old: w_rax,
|
||||
},
|
||||
"F04C0FB194F1C7CFFFFF",
|
||||
"lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
|
||||
@@ -4302,27 +4324,62 @@ fn test_x64_emit() {
|
||||
|
||||
// AtomicRmwSeq
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I8,
|
||||
op: inst_common::AtomicRmwOp::Or,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"490FB6014989C34D09D3F0450FB0190F85EFFFFFFF",
|
||||
"atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I16,
|
||||
op: inst_common::AtomicRmwOp::And,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"490FB7014989C34D21D366F0450FB1190F85EEFFFFFF",
|
||||
"atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I32,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"418B014989C34D89D3F0450FB1190F85EFFFFFFF",
|
||||
"atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Umin, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I32,
|
||||
op: inst_common::AtomicRmwOp::Umin,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"418B014989C34539DA4D0F46DAF0450FB1190F85EBFFFFFF",
|
||||
"atomically { 32_bits_at_[%r9]) Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, },
|
||||
Inst::AtomicRmwSeq {
|
||||
ty: types::I64,
|
||||
op: inst_common::AtomicRmwOp::Add,
|
||||
address: r9,
|
||||
operand: r10,
|
||||
temp: w_r11,
|
||||
dst_old: w_rax
|
||||
},
|
||||
"498B014989C34D01D3F04D0FB1190F85EFFFFFFF",
|
||||
"atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
|
||||
));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
890
cranelift/codegen/src/isa/x64/lower.isle
Normal file
890
cranelift/codegen/src/isa/x64/lower.isle
Normal file
@@ -0,0 +1,890 @@
|
||||
;; x86-64 instruction selection and CLIF-to-MachInst lowering.
|
||||
|
||||
;; The main lowering constructor term: takes a clif `Inst` and returns the
|
||||
;; register(s) within which the lowered instruction's result values live.
|
||||
(decl lower (Inst) ValueRegs)
|
||||
|
||||
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iconst (u64_from_imm64 x))))
|
||||
(value_reg (imm ty x)))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128
|
||||
(iconst (u64_from_imm64 x))))
|
||||
(value_regs (imm $I64 x)
|
||||
(imm $I64 0)))
|
||||
|
||||
;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `b64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bconst $false)))
|
||||
(value_reg (imm ty 0)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bconst $true)))
|
||||
(value_reg (imm ty 1)))
|
||||
|
||||
;; `b128`
|
||||
|
||||
(rule (lower (has_type $B128
|
||||
(bconst $false)))
|
||||
(value_regs (imm $B64 0)
|
||||
(imm $B64 0)))
|
||||
|
||||
(rule (lower (has_type $B128
|
||||
(bconst $true)))
|
||||
(value_regs (imm $B64 1)
|
||||
(imm $B64 0)))
|
||||
|
||||
;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty (null)))
|
||||
(value_reg (imm ty 0)))
|
||||
|
||||
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; Add two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd x y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Add a register and an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd x (simm32_from_value y))))
|
||||
(value_reg (add ty (put_in_reg x) y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd (simm32_from_value x) y)))
|
||||
(value_reg (add ty (put_in_reg y) x)))
|
||||
|
||||
;; Add a register and memory.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd x (sinkable_load y))))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd (sinkable_load x) y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(iadd x y)))
|
||||
(value_reg (paddb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(iadd x y)))
|
||||
(value_reg (paddw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(iadd x y)))
|
||||
(value_reg (paddd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(iadd x y)))
|
||||
(value_reg (paddq (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128 (iadd x y)))
|
||||
;; Get the high/low registers for `x`.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1)))
|
||||
;; Get the high/low registers for `y`.
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
;; Do an add followed by an add-with-carry.
|
||||
(with_flags (add_with_flags $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(adc $I64 x_hi (RegMemImm.Reg y_hi))))))
|
||||
|
||||
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(sadd_sat x y)))
|
||||
(value_reg (paddsb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(sadd_sat x y)))
|
||||
(value_reg (paddsw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(uadd_sat x y)))
|
||||
(value_reg (paddusb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(uadd_sat x y)))
|
||||
(value_reg (paddusw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `iadd_ifcout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Add two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout x y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Add a register and an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout x (simm32_from_value y))))
|
||||
(value_reg (add ty (put_in_reg x) y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout (simm32_from_value x) y)))
|
||||
(value_reg (add ty (put_in_reg y) x)))
|
||||
|
||||
;; Add a register and memory.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout x (sinkable_load y))))
|
||||
(value_reg (add ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(iadd_ifcout (sinkable_load x) y)))
|
||||
(value_reg (add ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; (No `iadd_ifcout` for `i128`.)
|
||||
|
||||
;;;; Rules for `iadd_imm` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; When the immediate fits in a `RegMemImm.Imm`, use that.
|
||||
(rule (lower (has_type (fits_in_64 ty) (iadd_imm (simm32_from_imm64 x) y)))
|
||||
(value_reg (add ty (put_in_reg y) x)))
|
||||
|
||||
;; Otherwise, put the immediate into a register.
|
||||
(rule (lower (has_type (fits_in_64 ty) (iadd_imm (u64_from_imm64 x) y)))
|
||||
(value_reg (add ty (put_in_reg y) (RegMemImm.Reg (imm ty x)))))
|
||||
|
||||
;; `i128`
|
||||
|
||||
;; When the immediate fits in a `RegMemImm.Imm`, use that.
|
||||
(rule (lower (has_type $I128 (iadd_imm (simm32_from_imm64 x) y)))
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
(with_flags (add_with_flags $I64 y_lo x)
|
||||
(adc $I64 y_hi (RegMemImm.Imm 0)))))
|
||||
|
||||
;; Otherwise, put the immediate into a register.
|
||||
(rule (lower (has_type $I128 (iadd_imm (u64_from_imm64 x) y)))
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1))
|
||||
(x_lo Reg (imm $I64 x)))
|
||||
(with_flags (add_with_flags $I64 y_lo (RegMemImm.Reg x_lo))
|
||||
(adc $I64 y_hi (RegMemImm.Imm 0)))))
|
||||
|
||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; Sub two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(isub x y)))
|
||||
(value_reg (sub ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Sub a register and an immediate.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(isub x (simm32_from_value y))))
|
||||
(value_reg (sub ty (put_in_reg x) y)))
|
||||
|
||||
;; Sub a register and memory.
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(isub x (sinkable_load y))))
|
||||
(value_reg (sub ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(isub x y)))
|
||||
(value_reg (psubb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(isub x y)))
|
||||
(value_reg (psubw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(isub x y)))
|
||||
(value_reg (psubd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(isub x y)))
|
||||
(value_reg (psubq (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128 (isub x y)))
|
||||
;; Get the high/low registers for `x`.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1)))
|
||||
;; Get the high/low registers for `y`.
|
||||
(let ((y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
;; Do a sub followed by an sub-with-borrow.
|
||||
(with_flags (sub_with_flags $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(sbb $I64 x_hi (RegMemImm.Reg y_hi))))))
|
||||
|
||||
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(ssub_sat x y)))
|
||||
(value_reg (psubsb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(ssub_sat x y)))
|
||||
(value_reg (psubsw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(usub_sat x y)))
|
||||
(value_reg (psubusb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(usub_sat x y)))
|
||||
(value_reg (psubusw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; And two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (band x y)))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; And with a memory operand.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band x (sinkable_load y))))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band (sinkable_load x) y)))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; And with an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band x (simm32_from_value y))))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg x)
|
||||
y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(band (simm32_from_value x) y)))
|
||||
(value_reg (m_and ty
|
||||
(put_in_reg y)
|
||||
x)))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type $F32X4 (band x y)))
|
||||
(value_reg (andps (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (band x y)))
|
||||
(value_reg (andpd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane _bits _lanes)
|
||||
(band x y)))
|
||||
(value_reg (pand (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
(rule (lower (has_type $I128 (band x y)))
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
(value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(m_and $I64 x_hi (RegMemImm.Reg y_hi)))))
|
||||
|
||||
(rule (lower (has_type $B128 (band x y)))
|
||||
;; Booleans are always `0` or `1`, so we only need to do the `and` on the
|
||||
;; low half. The high half is always zero but, rather than generate a new
|
||||
;; zero, we just reuse `x`'s high half which is already zero.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_lo Reg (lo_reg y)))
|
||||
(value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
x_hi)))
|
||||
|
||||
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; Or two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (bor x y)))
|
||||
(value_reg (or ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Or with a memory operand.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor x (sinkable_load y))))
|
||||
(value_reg (or ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor (sinkable_load x) y)))
|
||||
(value_reg (or ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; Or with an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor x (simm32_from_value y))))
|
||||
(value_reg (or ty
|
||||
(put_in_reg x)
|
||||
y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bor (simm32_from_value x) y)))
|
||||
(value_reg (or ty
|
||||
(put_in_reg y)
|
||||
x)))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type $F32X4 (bor x y)))
|
||||
(value_reg (orps (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (bor x y)))
|
||||
(value_reg (orpd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane _bits _lanes)
|
||||
(bor x y)))
|
||||
(value_reg (por (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
(decl or_i128 (ValueRegs ValueRegs) ValueRegs)
|
||||
(rule (or_i128 x y)
|
||||
(let ((x_lo Reg (value_regs_get x 0))
|
||||
(x_hi Reg (value_regs_get x 1))
|
||||
(y_lo Reg (value_regs_get y 0))
|
||||
(y_hi Reg (value_regs_get y 1)))
|
||||
(value_regs (or $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(or $I64 x_hi (RegMemImm.Reg y_hi)))))
|
||||
|
||||
(rule (lower (has_type $I128 (bor x y)))
|
||||
(or_i128 (put_in_regs x) (put_in_regs y)))
|
||||
|
||||
(rule (lower (has_type $B128 (bor x y)))
|
||||
;; Booleans are always `0` or `1`, so we only need to do the `or` on the
|
||||
;; low half. The high half is always zero but, rather than generate a new
|
||||
;; zero, we just reuse `x`'s high half which is already zero.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_lo Reg (lo_reg y)))
|
||||
(value_regs (or $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
x_hi)))
|
||||
|
||||
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; Xor two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (bxor x y)))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Xor with a memory operand.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor x (sinkable_load y))))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor (sinkable_load x) y)))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; Xor with an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor x (simm32_from_value y))))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg x)
|
||||
y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(bxor (simm32_from_value x) y)))
|
||||
(value_reg (xor ty
|
||||
(put_in_reg y)
|
||||
x)))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type $F32X4 (bxor x y)))
|
||||
(value_reg (xorps (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (bxor x y)))
|
||||
(value_reg (xorpd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane _bits _lanes)
|
||||
(bxor x y)))
|
||||
(value_reg (pxor (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
(rule (lower (has_type $I128 (bxor x y)))
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_regs ValueRegs (put_in_regs y))
|
||||
(y_lo Reg (value_regs_get y_regs 0))
|
||||
(y_hi Reg (value_regs_get y_regs 1)))
|
||||
(value_regs (xor $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
(xor $I64 x_hi (RegMemImm.Reg y_hi)))))
|
||||
|
||||
(rule (lower (has_type $B128 (bxor x y)))
|
||||
;; Booleans are always `0` or `1`, so we only need to do the `xor` on the
|
||||
;; low half. The high half is always zero but, rather than generate a new
|
||||
;; zero, we just reuse `x`'s high half which is already zero.
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1))
|
||||
(y_lo Reg (lo_reg y)))
|
||||
(value_regs (xor $I64 x_lo (RegMemImm.Reg y_lo))
|
||||
x_hi)))
|
||||
|
||||
;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ishl src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||
;; amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(value_reg (shl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ishl src (imm8_from_value amt))))
|
||||
(value_reg (shl ty (put_in_reg src) amt)))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(decl shl_i128 (ValueRegs Reg) ValueRegs)
|
||||
(rule (shl_i128 src amt)
|
||||
;; Unpack the registers that make up the 128-bit value being shifted.
|
||||
(let ((src_lo Reg (value_regs_get src 0))
|
||||
(src_hi Reg (value_regs_get src 1))
|
||||
;; Do two 64-bit shifts.
|
||||
(lo_shifted Reg (shl $I64 src_lo (Imm8Reg.Reg amt)))
|
||||
(hi_shifted Reg (shl $I64 src_hi (Imm8Reg.Reg amt)))
|
||||
;; `src_lo >> (64 - amt)` are the bits to carry over from the lo
|
||||
;; into the hi.
|
||||
(carry Reg (shr $I64 src_lo (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt)))))
|
||||
(zero Reg (imm $I64 0))
|
||||
;; Nullify the carry if we are shifting in by a multiple of 128.
|
||||
(carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg zero) carry)))
|
||||
;; Add the carry into the high half.
|
||||
(hi_shifted_ Reg (or $I64 carry_ (RegMemImm.Reg hi_shifted))))
|
||||
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
||||
;; (modulo 128), then the low bits are zero and the high bits are our
|
||||
;; low bits.
|
||||
(with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg lo_shifted) zero)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg hi_shifted_) lo_shifted))))
|
||||
|
||||
(rule (lower (has_type $I128 (ishl src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||
;; amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(shl_i128 (put_in_regs src) amt_)))
|
||||
|
||||
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ushr src amt)))
|
||||
(let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the
|
||||
;; shift amount to the value's bit width.
|
||||
(amt_ Reg (lo_reg amt)))
|
||||
(value_reg (shr ty src_ (Imm8Reg.Reg amt_)))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ushr src (imm8_from_value amt))))
|
||||
(let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero))))
|
||||
(value_reg (shr ty src_ amt))))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(decl shr_i128 (ValueRegs Reg) ValueRegs)
|
||||
(rule (shr_i128 src amt)
|
||||
;; Unpack the lo/hi halves of `src`.
|
||||
(let ((src_lo Reg (value_regs_get src 0))
|
||||
(src_hi Reg (value_regs_get src 1))
|
||||
;; Do a shift on each half.
|
||||
(lo_shifted Reg (shr $I64 src_lo (Imm8Reg.Reg amt)))
|
||||
(hi_shifted Reg (shr $I64 src_hi (Imm8Reg.Reg amt)))
|
||||
;; `src_hi << (64 - amt)` are the bits to carry over from the hi
|
||||
;; into the lo.
|
||||
(carry Reg (shl $I64 src_hi (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt)))))
|
||||
;; Nullify the carry if we are shifting by a multiple of 128.
|
||||
(carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg (imm $I64 0)) carry)))
|
||||
;; Add the carry bits into the lo.
|
||||
(lo_shifted_ Reg (or $I64 carry_ (RegMemImm.Reg lo_shifted))))
|
||||
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
||||
;; (modulo 128), then the hi bits are zero and the lo bits are what
|
||||
;; would otherwise be our hi bits.
|
||||
(with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg lo_shifted_) hi_shifted)
|
||||
(cmove $I64 (CC.Z) (RegMem.Reg hi_shifted) (imm $I64 0)))))
|
||||
|
||||
(rule (lower (has_type $I128 (ushr src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||
;; amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(shr_i128 (put_in_regs src) amt_)))
|
||||
|
||||
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the
|
||||
;; shift amount to the value's bit width.
|
||||
(let ((amt_ Reg (lo_reg amt)))
|
||||
(value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (rotl src (imm8_from_value amt))))
|
||||
(value_reg (m_rotl ty (put_in_reg src) amt)))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(rule (lower (has_type $I128 (rotl src amt)))
|
||||
(let ((src_ ValueRegs (put_in_regs src))
|
||||
;; NB: Only the low bits of `amt` matter since we logically mask the
|
||||
;; rotation amount to the value's bit width.
|
||||
(amt_ Reg (lo_reg amt)))
|
||||
(or_i128 (shl_i128 src_ amt_)
|
||||
(shr_i128 src_ (sub $I64 (imm $I64 128) (RegMemImm.Reg amt_))))))
|
||||
|
||||
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(avg_round x y)))
|
||||
(value_reg (pavgb (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(avg_round x y)))
|
||||
(value_reg (pavgw (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
;; Multiply two registers.
|
||||
(rule (lower (has_type (fits_in_64 ty) (imul x y)))
|
||||
(value_reg (mul ty
|
||||
(put_in_reg x)
|
||||
(RegMemImm.Reg (put_in_reg y)))))
|
||||
|
||||
;; Multiply a register and an immediate.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul x (simm32_from_value y))))
|
||||
(value_reg (mul ty (put_in_reg x) y)))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul (simm32_from_value x) y)))
|
||||
(value_reg (mul ty (put_in_reg y) x)))
|
||||
|
||||
;; Multiply a register and a memory load.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul x (sinkable_load y))))
|
||||
(value_reg (mul ty
|
||||
(put_in_reg x)
|
||||
(sink_load y))))
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty)
|
||||
(imul (sinkable_load x) y)))
|
||||
(value_reg (mul ty
|
||||
(put_in_reg y)
|
||||
(sink_load x))))
|
||||
|
||||
;; SSE.
|
||||
|
||||
;; (No i8x16 multiply.)
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8) (imul x y)))
|
||||
(value_reg (pmullw (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4) (imul x y)))
|
||||
(value_reg (pmulld (put_in_reg x) (put_in_reg_mem y))))
|
||||
|
||||
;; With AVX-512 we can implement `i64x2` multiplication with a single
|
||||
;; instruction.
|
||||
(rule (lower (has_type (and (avx512vl_enabled)
|
||||
(avx512dq_enabled)
|
||||
(multi_lane 64 2))
|
||||
(imul x y)))
|
||||
(value_reg (vpmullq (put_in_reg_mem x) (put_in_reg y))))
|
||||
|
||||
;; Otherwise, for i64x2 multiplication we describe a lane A as being composed of
|
||||
;; a 32-bit upper half "Ah" and a 32-bit lower half "Al". The 32-bit long hand
|
||||
;; multiplication can then be written as:
|
||||
;;
|
||||
;; Ah Al
|
||||
;; * Bh Bl
|
||||
;; -----
|
||||
;; Al * Bl
|
||||
;; + (Ah * Bl) << 32
|
||||
;; + (Al * Bh) << 32
|
||||
;;
|
||||
;; So for each lane we will compute:
|
||||
;;
|
||||
;; A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
|
||||
;;
|
||||
;; Note, the algorithm will use `pmuldq` which operates directly on the lower
|
||||
;; 32-bit (`Al` or `Bl`) of a lane and writes the result to the full 64-bits of
|
||||
;; the lane of the destination. For this reason we don't need shifts to isolate
|
||||
;; the lower 32-bits, however, we will need to use shifts to isolate the high
|
||||
;; 32-bits when doing calculations, i.e., `Ah == A >> 32`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul a b)))
|
||||
(let ((a0 Reg (put_in_reg a))
|
||||
(b0 Reg (put_in_reg b))
|
||||
;; a_hi = A >> 32
|
||||
(a_hi Reg (psrlq a0 (RegMemImm.Imm 32)))
|
||||
;; ah_bl = Ah * Bl
|
||||
(ah_bl Reg (pmuludq a_hi (RegMem.Reg b0)))
|
||||
;; b_hi = B >> 32
|
||||
(b_hi Reg (psrlq b0 (RegMemImm.Imm 32)))
|
||||
;; al_bh = Al * Bh
|
||||
(al_bh Reg (pmuludq a0 (RegMem.Reg b_hi)))
|
||||
;; aa_bb = ah_bl + al_bh
|
||||
(aa_bb Reg (paddq ah_bl (RegMem.Reg al_bh)))
|
||||
;; aa_bb_shifted = aa_bb << 32
|
||||
(aa_bb_shifted Reg (psllq aa_bb (RegMemImm.Imm 32)))
|
||||
;; al_bl = Al * Bl
|
||||
(al_bl Reg (pmuludq a0 (RegMem.Reg b0))))
|
||||
;; al_bl + aa_bb_shifted
|
||||
(value_reg (paddq al_bl (RegMem.Reg aa_bb_shifted)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_high_i8x16_s`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (swiden_high (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x1 Reg (put_in_reg x))
|
||||
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
|
||||
(x3 Reg (pmovsxbw (RegMem.Reg x2)))
|
||||
(y1 Reg (put_in_reg y))
|
||||
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
|
||||
(y3 Reg (pmovsxbw (RegMem.Reg y2))))
|
||||
(value_reg (pmullw x3 (RegMem.Reg y3)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_high_i16x8_s`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (swiden_high (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_high_i32x4_s`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (swiden_high (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0xFA
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0xFA
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_low_i8x16_s`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (swiden_low (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x2 Reg (pmovsxbw (put_in_reg_mem x)))
|
||||
(y2 Reg (pmovsxbw (put_in_reg_mem y))))
|
||||
(value_reg (pmullw x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_low_i16x8_s`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (swiden_low (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_low_i32x4_s`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (swiden_low (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0x50
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0x50
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_high_i8x16_u`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (uwiden_high (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x1 Reg (put_in_reg x))
|
||||
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
|
||||
(x3 Reg (pmovzxbw (RegMem.Reg x2)))
|
||||
(y1 Reg (put_in_reg y))
|
||||
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
|
||||
(y3 Reg (pmovzxbw (RegMem.Reg y2))))
|
||||
(value_reg (pmullw x3 (RegMem.Reg y3)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_high_i16x8_u`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (uwiden_high (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_high_i32x4_u`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (uwiden_high (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0xFA
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0xFA
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_low_i8x16_u`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(imul (def_inst (uwiden_low (and (value_type (multi_lane 8 16))
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x2 Reg (pmovzxbw (put_in_reg_mem x)))
|
||||
(y2 Reg (pmovzxbw (put_in_reg_mem y))))
|
||||
(value_reg (pmullw x2 (RegMem.Reg y2)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_low_i16x8_u`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(imul (def_inst (uwiden_low (and (value_type (multi_lane 16 8))
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_low_i32x4_u`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul (def_inst (uwiden_low (and (value_type (multi_lane 32 4))
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
0x50
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
0x50
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
|
||||
@@ -1,5 +1,8 @@
|
||||
//! Lowering rules for X64.
|
||||
|
||||
// ISLE integration glue.
|
||||
mod isle;
|
||||
|
||||
use crate::data_value::DataValue;
|
||||
use crate::ir::{
|
||||
condcodes::{CondCode, FloatCC, IntCC},
|
||||
@@ -1497,20 +1500,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
None
|
||||
};
|
||||
|
||||
match op {
|
||||
Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
|
||||
let value = ctx
|
||||
.get_constant(insn)
|
||||
.expect("constant value for iconst et al");
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
for inst in Inst::gen_constant(dst, value as u128, ty.unwrap(), |ty| {
|
||||
ctx.alloc_tmp(ty).only_reg().unwrap()
|
||||
}) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
if let Ok(()) = isle::lower(ctx, isa_flags, &outputs, insn) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Opcode::Iadd
|
||||
match op {
|
||||
Opcode::Iconst
|
||||
| Opcode::Bconst
|
||||
| Opcode::Null
|
||||
| Opcode::Iadd
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::SaddSat
|
||||
| Opcode::UaddSat
|
||||
@@ -1521,149 +1519,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Band
|
||||
| Opcode::Bor
|
||||
| Opcode::Bxor => {
|
||||
let ty = ty.unwrap();
|
||||
if ty.lane_count() > 1 {
|
||||
let sse_op = match op {
|
||||
Opcode::Iadd => match ty {
|
||||
types::I8X16 => SseOpcode::Paddb,
|
||||
types::I16X8 => SseOpcode::Paddw,
|
||||
types::I32X4 => SseOpcode::Paddd,
|
||||
types::I64X2 => SseOpcode::Paddq,
|
||||
_ => panic!("Unsupported type for packed iadd instruction: {}", ty),
|
||||
},
|
||||
Opcode::SaddSat => match ty {
|
||||
types::I8X16 => SseOpcode::Paddsb,
|
||||
types::I16X8 => SseOpcode::Paddsw,
|
||||
_ => panic!("Unsupported type for packed sadd_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::UaddSat => match ty {
|
||||
types::I8X16 => SseOpcode::Paddusb,
|
||||
types::I16X8 => SseOpcode::Paddusw,
|
||||
_ => panic!("Unsupported type for packed uadd_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::Isub => match ty {
|
||||
types::I8X16 => SseOpcode::Psubb,
|
||||
types::I16X8 => SseOpcode::Psubw,
|
||||
types::I32X4 => SseOpcode::Psubd,
|
||||
types::I64X2 => SseOpcode::Psubq,
|
||||
_ => panic!("Unsupported type for packed isub instruction: {}", ty),
|
||||
},
|
||||
Opcode::SsubSat => match ty {
|
||||
types::I8X16 => SseOpcode::Psubsb,
|
||||
types::I16X8 => SseOpcode::Psubsw,
|
||||
_ => panic!("Unsupported type for packed ssub_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::UsubSat => match ty {
|
||||
types::I8X16 => SseOpcode::Psubusb,
|
||||
types::I16X8 => SseOpcode::Psubusw,
|
||||
_ => panic!("Unsupported type for packed usub_sat instruction: {}", ty),
|
||||
},
|
||||
Opcode::AvgRound => match ty {
|
||||
types::I8X16 => SseOpcode::Pavgb,
|
||||
types::I16X8 => SseOpcode::Pavgw,
|
||||
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
|
||||
},
|
||||
Opcode::Band => match ty {
|
||||
types::F32X4 => SseOpcode::Andps,
|
||||
types::F64X2 => SseOpcode::Andpd,
|
||||
_ => SseOpcode::Pand,
|
||||
},
|
||||
Opcode::Bor => match ty {
|
||||
types::F32X4 => SseOpcode::Orps,
|
||||
types::F64X2 => SseOpcode::Orpd,
|
||||
_ => SseOpcode::Por,
|
||||
},
|
||||
Opcode::Bxor => match ty {
|
||||
types::F32X4 => SseOpcode::Xorps,
|
||||
types::F64X2 => SseOpcode::Xorpd,
|
||||
_ => SseOpcode::Pxor,
|
||||
},
|
||||
_ => panic!("Unsupported packed instruction: {}", op),
|
||||
};
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
// Move the `lhs` to the same register as `dst`.
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
|
||||
} else if ty == types::I128 || ty == types::B128 {
|
||||
let alu_ops = match op {
|
||||
Opcode::Iadd => (AluRmiROpcode::Add, AluRmiROpcode::Adc),
|
||||
Opcode::Isub => (AluRmiROpcode::Sub, AluRmiROpcode::Sbb),
|
||||
Opcode::Band => (AluRmiROpcode::And, AluRmiROpcode::And),
|
||||
Opcode::Bor => (AluRmiROpcode::Or, AluRmiROpcode::Or),
|
||||
Opcode::Bxor => (AluRmiROpcode::Xor, AluRmiROpcode::Xor),
|
||||
_ => panic!("Unsupported opcode with 128-bit integers: {:?}", op),
|
||||
};
|
||||
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
assert_eq!(lhs.len(), 2);
|
||||
assert_eq!(rhs.len(), 2);
|
||||
assert_eq!(dst.len(), 2);
|
||||
|
||||
// For add, sub, and, or, xor: just do ops on lower then upper
|
||||
// half. Carry-flag propagation is implicit (add/adc, sub/sbb).
|
||||
ctx.emit(Inst::gen_move(dst.regs()[0], lhs.regs()[0], types::I64));
|
||||
ctx.emit(Inst::gen_move(dst.regs()[1], lhs.regs()[1], types::I64));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
alu_ops.0,
|
||||
RegMemImm::reg(rhs.regs()[0]),
|
||||
dst.regs()[0],
|
||||
));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
alu_ops.1,
|
||||
RegMemImm::reg(rhs.regs()[1]),
|
||||
dst.regs()[1],
|
||||
));
|
||||
} else {
|
||||
let size = if ty == types::I64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
};
|
||||
let alu_op = match op {
|
||||
Opcode::Iadd | Opcode::IaddIfcout => AluRmiROpcode::Add,
|
||||
Opcode::Isub => AluRmiROpcode::Sub,
|
||||
Opcode::Band => AluRmiROpcode::And,
|
||||
Opcode::Bor => AluRmiROpcode::Or,
|
||||
Opcode::Bxor => AluRmiROpcode::Xor,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let (lhs, rhs) = match op {
|
||||
Opcode::Iadd
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::Band
|
||||
| Opcode::Bor
|
||||
| Opcode::Bxor => {
|
||||
// For commutative operations, try to commute operands if one is an
|
||||
// immediate or direct memory reference. Do so by converting LHS to RMI; if
|
||||
// reg, then always convert RHS to RMI; else, use LHS as RMI and convert
|
||||
// RHS to reg.
|
||||
let lhs = input_to_reg_mem_imm(ctx, inputs[0]);
|
||||
if let RegMemImm::Reg { reg: lhs_reg } = lhs {
|
||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||
(lhs_reg, rhs)
|
||||
} else {
|
||||
let rhs_reg = put_input_in_reg(ctx, inputs[1]);
|
||||
(rhs_reg, lhs)
|
||||
}
|
||||
}
|
||||
Opcode::Isub => (
|
||||
put_input_in_reg(ctx, inputs[0]),
|
||||
input_to_reg_mem_imm(ctx, inputs[1]),
|
||||
),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst));
|
||||
ctx.emit(Inst::alu_rmi_r(size, alu_op, rhs, dst));
|
||||
}
|
||||
unreachable!(
|
||||
"implemented in ISLE: inst = `{}`, type = `{:?}`",
|
||||
ctx.dfg().display_inst(insn),
|
||||
ty
|
||||
);
|
||||
}
|
||||
|
||||
Opcode::Imul => {
|
||||
@@ -1681,469 +1541,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::UwidenLow,
|
||||
],
|
||||
) {
|
||||
// Optimized ext_mul_* lowerings are based on optimized lowerings
|
||||
// here: https://github.com/WebAssembly/simd/pull/376
|
||||
if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
|
||||
if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {
|
||||
let swiden_input = &[
|
||||
InsnInput {
|
||||
insn: swiden0_high,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: swiden1_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(swiden0_high, 0);
|
||||
let input1_ty = ctx.input_ty(swiden1_high, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_high_i8x16_s
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
));
|
||||
|
||||
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(dst.to_reg()),
|
||||
dst,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_high_i16x8_s
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpckhwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_high_i32x4_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuldq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note swiden_high only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(swiden0_low) = matches_input(ctx, inputs[0], Opcode::SwidenLow) {
|
||||
if let Some(swiden1_low) = matches_input(ctx, inputs[1], Opcode::SwidenLow) {
|
||||
let swiden_input = &[
|
||||
InsnInput {
|
||||
insn: swiden0_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: swiden1_low,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(swiden0_low, 0);
|
||||
let input1_ty = ctx.input_ty(swiden1_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, swiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, swiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i32x4.extmul_low_i8x16_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovsxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_low_i16x8_s
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpcklwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_low_i32x4_s
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuldq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note swiden_low only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_low_signed type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh)
|
||||
{
|
||||
if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) {
|
||||
let uwiden_input = &[
|
||||
InsnInput {
|
||||
insn: uwiden0_high,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: uwiden1_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
let input0_ty = ctx.input_ty(uwiden0_high, 0);
|
||||
let input1_ty = ctx.input_ty(uwiden1_high, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_high_i8x16_u
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(lhs),
|
||||
Writable::from_reg(lhs),
|
||||
));
|
||||
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Palignr,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
8,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(dst.to_reg()),
|
||||
dst,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_high_i16x8_u
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhuw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpckhwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_high_i32x4_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0xFA,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note uwiden_high only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_high_unsigned type"),
|
||||
}
|
||||
}
|
||||
} else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
|
||||
if let Some(uwiden1_low) = matches_input(ctx, inputs[1], Opcode::UwidenLow) {
|
||||
let uwiden_input = &[
|
||||
InsnInput {
|
||||
insn: uwiden0_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: uwiden1_low,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
|
||||
let input0_ty = ctx.input_ty(uwiden0_low, 0);
|
||||
let input1_ty = ctx.input_ty(uwiden1_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let lhs = put_input_in_reg(ctx, uwiden_input[0]);
|
||||
let rhs = put_input_in_reg(ctx, uwiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
match (input0_ty, input1_ty, output_ty) {
|
||||
(types::I8X16, types::I8X16, types::I16X8) => {
|
||||
// i16x8.extmul_low_i8x16_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Pmovzxbw,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmullw,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I16X8, types::I32X4) => {
|
||||
// i32x4.extmul_low_i16x8_u
|
||||
ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
|
||||
let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmulhuw,
|
||||
RegMem::reg(rhs),
|
||||
tmp_reg,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Punpcklwd,
|
||||
RegMem::from(tmp_reg),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I32X4, types::I32X4, types::I64X2) => {
|
||||
// i64x2.extmul_low_i32x4_u
|
||||
let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(lhs),
|
||||
tmp_reg,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r_imm(
|
||||
SseOpcode::Pshufd,
|
||||
RegMem::reg(rhs),
|
||||
dst,
|
||||
0x50,
|
||||
OperandSize::Size32,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(tmp_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
// Note uwiden_low only allows types: I8X16, I16X8, and I32X4
|
||||
_ => panic!("Unsupported extmul_low_unsigned type"),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
panic!("Unsupported imul operation for type: {}", ty);
|
||||
}
|
||||
unreachable!("implemented in ISLE: {}", ctx.dfg().display_inst(insn));
|
||||
} else if ty == types::I64X2 {
|
||||
// Eventually one of these should be `input_to_reg_mem` (TODO).
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512dq_simd() {
|
||||
// With the right AVX512 features (VL + DQ) this operation
|
||||
// can lower to a single operation.
|
||||
ctx.emit(Inst::xmm_rm_r_evex(
|
||||
Avx512Opcode::Vpmullq,
|
||||
RegMem::reg(rhs),
|
||||
lhs,
|
||||
dst,
|
||||
));
|
||||
} else {
|
||||
// Otherwise, for I64X2 multiplication we describe a lane A as being
|
||||
// composed of a 32-bit upper half "Ah" and a 32-bit lower half
|
||||
// "Al". The 32-bit long hand multiplication can then be written
|
||||
// as:
|
||||
// Ah Al
|
||||
// * Bh Bl
|
||||
// -----
|
||||
// Al * Bl
|
||||
// + (Ah * Bl) << 32
|
||||
// + (Al * Bh) << 32
|
||||
//
|
||||
// So for each lane we will compute:
|
||||
// A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
|
||||
//
|
||||
// Note, the algorithm will use pmuldq which operates directly
|
||||
// on the lower 32-bit (Al or Bl) of a lane and writes the
|
||||
// result to the full 64-bits of the lane of the destination.
|
||||
// For this reason we don't need shifts to isolate the lower
|
||||
// 32-bits, however, we will need to use shifts to isolate the
|
||||
// high 32-bits when doing calculations, i.e., Ah == A >> 32.
|
||||
//
|
||||
// The full sequence then is as follows:
|
||||
// A' = A
|
||||
// A' = A' >> 32
|
||||
// A' = Ah' * Bl
|
||||
// B' = B
|
||||
// B' = B' >> 32
|
||||
// B' = Bh' * Al
|
||||
// B' = B' + A'
|
||||
// B' = B' << 32
|
||||
// A' = A
|
||||
// A' = Al' * Bl
|
||||
// A' = A' + B'
|
||||
// dst = A'
|
||||
|
||||
// A' = A
|
||||
let rhs_1 = ctx.alloc_tmp(types::I64X2).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
|
||||
|
||||
// A' = A' >> 32
|
||||
// A' = Ah' * Bl
|
||||
ctx.emit(Inst::xmm_rmi_reg(
|
||||
SseOpcode::Psrlq,
|
||||
RegMemImm::imm(32),
|
||||
rhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(lhs.clone()),
|
||||
rhs_1,
|
||||
));
|
||||
|
||||
// B' = B
|
||||
let lhs_1 = ctx.alloc_tmp(types::I64X2).only_reg().unwrap();
|
||||
ctx.emit(Inst::gen_move(lhs_1, lhs, ty));
|
||||
|
||||
// B' = B' >> 32
|
||||
// B' = Bh' * Al
|
||||
ctx.emit(Inst::xmm_rmi_reg(
|
||||
SseOpcode::Psrlq,
|
||||
RegMemImm::imm(32),
|
||||
lhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(rhs), lhs_1));
|
||||
|
||||
// B' = B' + A'
|
||||
// B' = B' << 32
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Paddq,
|
||||
RegMem::reg(rhs_1.to_reg()),
|
||||
lhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rmi_reg(
|
||||
SseOpcode::Psllq,
|
||||
RegMemImm::imm(32),
|
||||
lhs_1,
|
||||
));
|
||||
|
||||
// A' = A
|
||||
// A' = Al' * Bl
|
||||
// A' = A' + B'
|
||||
// dst = A'
|
||||
ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmuludq,
|
||||
RegMem::reg(lhs.clone()),
|
||||
rhs_1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Paddq,
|
||||
RegMem::reg(lhs_1.to_reg()),
|
||||
rhs_1,
|
||||
));
|
||||
ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty));
|
||||
}
|
||||
unreachable!("implemented in ISLE: {}", ctx.dfg().display_inst(insn));
|
||||
} else if ty.lane_count() > 1 {
|
||||
// Emit single instruction lowerings for the remaining vector
|
||||
// multiplications.
|
||||
@@ -2228,29 +1628,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
dst.regs()[1],
|
||||
));
|
||||
} else {
|
||||
let size = if ty == types::I64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
};
|
||||
let alu_op = AluRmiROpcode::Mul;
|
||||
|
||||
// For commutative operations, try to commute operands if one is
|
||||
// an immediate or direct memory reference. Do so by converting
|
||||
// LHS to RMI; if reg, then always convert RHS to RMI; else, use
|
||||
// LHS as RMI and convert RHS to reg.
|
||||
let lhs = input_to_reg_mem_imm(ctx, inputs[0]);
|
||||
let (lhs, rhs) = if let RegMemImm::Reg { reg: lhs_reg } = lhs {
|
||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||
(lhs_reg, rhs)
|
||||
} else {
|
||||
let rhs_reg = put_input_in_reg(ctx, inputs[1]);
|
||||
(rhs_reg, lhs)
|
||||
};
|
||||
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst));
|
||||
ctx.emit(Inst::alu_rmi_r(size, alu_op, rhs, dst));
|
||||
unreachable!("implemented in ISLE")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5801,7 +5179,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
// Now the AtomicRmwSeq (pseudo-) instruction itself
|
||||
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
||||
ctx.emit(Inst::AtomicRmwSeq { ty: ty_access, op });
|
||||
ctx.emit(Inst::AtomicRmwSeq {
|
||||
ty: ty_access,
|
||||
op,
|
||||
address: regs::r9(),
|
||||
operand: regs::r10(),
|
||||
temp: Writable::from_reg(regs::r11()),
|
||||
dst_old: Writable::from_reg(regs::rax()),
|
||||
});
|
||||
|
||||
// And finally, copy the preordained AtomicRmwSeq output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
||||
@@ -5827,8 +5212,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
));
|
||||
ctx.emit(Inst::LockCmpxchg {
|
||||
ty: ty_access,
|
||||
src: replacement,
|
||||
dst: addr.into(),
|
||||
mem: addr.into(),
|
||||
replacement,
|
||||
expected: regs::rax(),
|
||||
dst_old: Writable::from_reg(regs::rax()),
|
||||
});
|
||||
// And finally, copy the old value at the location to its destination reg.
|
||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
||||
|
||||
428
cranelift/codegen/src/isa/x64/lower/isle.rs
Normal file
428
cranelift/codegen/src/isa/x64/lower/isle.rs
Normal file
@@ -0,0 +1,428 @@
|
||||
//! ISLE integration glue code for x64 lowering.
|
||||
|
||||
// Pull in the ISLE generated code.
|
||||
mod generated_code;
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{
|
||||
is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
|
||||
Writable,
|
||||
};
|
||||
use crate::isa::x64::settings as x64_settings;
|
||||
use crate::{
|
||||
ir::{immediates::*, types::*, Inst, InstructionData, Opcode, Value, ValueList},
|
||||
isa::x64::inst::{
|
||||
args::{
|
||||
Amode, Avx512Opcode, CmpOpcode, ExtMode, Imm8Reg, RegMem, ShiftKind, SseOpcode, CC,
|
||||
},
|
||||
x64_map_regs, RegMapper,
|
||||
},
|
||||
machinst::{get_output_reg, InsnInput, InsnOutput, LowerCtx},
|
||||
};
|
||||
use smallvec::SmallVec;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
type Unit = ();
|
||||
type ValueSlice<'a> = &'a [Value];
|
||||
type ValueArray2 = [Value; 2];
|
||||
type ValueArray3 = [Value; 3];
|
||||
type WritableReg = Writable<Reg>;
|
||||
type ValueRegs = crate::machinst::ValueRegs<Reg>;
|
||||
|
||||
pub struct SinkableLoad {
|
||||
inst: Inst,
|
||||
addr_input: InsnInput,
|
||||
offset: i32,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct RegRenamer {
|
||||
// Map of `(old, new)` register names. Use a `SmallVec` because we typically
|
||||
// only have one or two renamings.
|
||||
renames: SmallVec<[(Reg, Reg); 2]>,
|
||||
}
|
||||
|
||||
impl RegRenamer {
|
||||
fn add_rename(&mut self, old: Reg, new: Reg) {
|
||||
self.renames.push((old, new));
|
||||
}
|
||||
|
||||
fn get_rename(&self, reg: Reg) -> Option<Reg> {
|
||||
self.renames
|
||||
.iter()
|
||||
.find(|(old, _)| reg == *old)
|
||||
.map(|(_, new)| *new)
|
||||
}
|
||||
}
|
||||
|
||||
impl RegMapper for RegRenamer {
|
||||
fn get_use(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg)
|
||||
}
|
||||
|
||||
fn get_def(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg)
|
||||
}
|
||||
|
||||
fn get_mod(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg)
|
||||
}
|
||||
}
|
||||
|
||||
/// The main entry point for lowering with ISLE.
|
||||
pub(crate) fn lower<C>(
|
||||
lower_ctx: &mut C,
|
||||
isa_flags: &x64_settings::Flags,
|
||||
outputs: &[InsnOutput],
|
||||
inst: Inst,
|
||||
) -> Result<(), ()>
|
||||
where
|
||||
C: LowerCtx<I = MInst>,
|
||||
{
|
||||
// TODO: reuse the ISLE context across lowerings so we can reuse its
|
||||
// internal heap allocations.
|
||||
let mut isle_ctx = IsleContext::new(lower_ctx, isa_flags);
|
||||
|
||||
let temp_regs = generated_code::constructor_lower(&mut isle_ctx, inst).ok_or(())?;
|
||||
let mut temp_regs = temp_regs.regs().iter();
|
||||
|
||||
// The ISLE generated code emits its own registers to define the
|
||||
// instruction's lowered values in. We rename those registers to the
|
||||
// registers they were assigned when their value was used as an operand in
|
||||
// earlier lowerings.
|
||||
let mut renamer = RegRenamer::default();
|
||||
for output in outputs {
|
||||
let dsts = get_output_reg(isle_ctx.lower_ctx, *output);
|
||||
for (temp, dst) in temp_regs.by_ref().zip(dsts.regs()) {
|
||||
renamer.add_rename(*temp, dst.to_reg());
|
||||
}
|
||||
}
|
||||
|
||||
for mut inst in isle_ctx.into_emitted_insts() {
|
||||
x64_map_regs(&mut inst, &renamer);
|
||||
lower_ctx.emit(inst);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct IsleContext<'a, C> {
|
||||
lower_ctx: &'a mut C,
|
||||
isa_flags: &'a x64_settings::Flags,
|
||||
emitted_insts: SmallVec<[MInst; 6]>,
|
||||
}
|
||||
|
||||
impl<'a, C> IsleContext<'a, C> {
|
||||
pub fn new(lower_ctx: &'a mut C, isa_flags: &'a x64_settings::Flags) -> Self {
|
||||
IsleContext {
|
||||
lower_ctx,
|
||||
isa_flags,
|
||||
emitted_insts: SmallVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_emitted_insts(self) -> SmallVec<[MInst; 6]> {
|
||||
self.emitted_insts
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C> generated_code::Context for IsleContext<'a, C>
|
||||
where
|
||||
C: LowerCtx<I = MInst>,
|
||||
{
|
||||
#[inline]
|
||||
fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
|
||||
let [a, b] = *arr;
|
||||
(a, b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn pack_value_array_2(&mut self, a: Value, b: Value) -> ValueArray2 {
|
||||
[a, b]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unpack_value_array_3(&mut self, arr: &ValueArray3) -> (Value, Value, Value) {
|
||||
let [a, b, c] = *arr;
|
||||
(a, b, c)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn pack_value_array_3(&mut self, a: Value, b: Value, c: Value) -> ValueArray3 {
|
||||
[a, b, c]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_reg(&mut self, reg: Reg) -> ValueRegs {
|
||||
ValueRegs::one(reg)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_regs(&mut self, r1: Reg, r2: Reg) -> ValueRegs {
|
||||
ValueRegs::two(r1, r2)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn temp_writable_reg(&mut self, ty: Type) -> WritableReg {
|
||||
let value_regs = self.lower_ctx.alloc_tmp(ty);
|
||||
value_regs.only_reg().unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn invalid_reg(&mut self) -> Reg {
|
||||
Reg::invalid()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn put_in_reg(&mut self, val: Value) -> Reg {
|
||||
self.lower_ctx.put_value_in_regs(val).only_reg().unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn put_in_regs(&mut self, val: Value) -> ValueRegs {
|
||||
self.lower_ctx.put_value_in_regs(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_regs_get(&mut self, regs: ValueRegs, i: usize) -> Reg {
|
||||
regs.regs()[i]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u8_as_u64(&mut self, x: u8) -> u64 {
|
||||
x.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u16_as_u64(&mut self, x: u16) -> u64 {
|
||||
x.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u32_as_u64(&mut self, x: u32) -> u64 {
|
||||
x.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ty_bits(&mut self, ty: Type) -> u16 {
|
||||
ty.bits()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fits_in_64(&mut self, ty: Type) -> Option<Type> {
|
||||
if ty.bits() <= 64 {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_list_slice(&mut self, list: ValueList) -> ValueSlice {
|
||||
list.as_slice(&self.lower_ctx.dfg().value_lists)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unwrap_head_value_list_1(&mut self, list: ValueList) -> (Value, ValueSlice) {
|
||||
match self.value_list_slice(list) {
|
||||
[head, tail @ ..] => (*head, tail),
|
||||
_ => out_of_line_panic("`unwrap_head_value_list_1` on empty `ValueList`"),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unwrap_head_value_list_2(&mut self, list: ValueList) -> (Value, Value, ValueSlice) {
|
||||
match self.value_list_slice(list) {
|
||||
[head1, head2, tail @ ..] => (*head1, *head2, tail),
|
||||
_ => out_of_line_panic(
|
||||
"`unwrap_head_value_list_2` on list without at least two elements",
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn writable_reg_to_reg(&mut self, r: WritableReg) -> Reg {
|
||||
r.to_reg()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
|
||||
imm.bits() as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn inst_results(&mut self, inst: Inst) -> ValueSlice {
|
||||
self.lower_ctx.dfg().inst_results(inst)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn first_result(&mut self, inst: Inst) -> Option<Value> {
|
||||
self.lower_ctx.dfg().inst_results(inst).first().copied()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn inst_data(&mut self, inst: Inst) -> InstructionData {
|
||||
self.lower_ctx.dfg()[inst].clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn value_type(&mut self, val: Value) -> Type {
|
||||
self.lower_ctx.dfg().value_type(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn multi_lane(&mut self, ty: Type) -> Option<(u8, u16)> {
|
||||
if ty.lane_count() > 1 {
|
||||
Some((ty.lane_bits(), ty.lane_count()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn def_inst(&mut self, val: Value) -> Option<Inst> {
|
||||
self.lower_ctx.dfg().value_def(val).inst()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn operand_size_of_type(&mut self, ty: Type) -> OperandSize {
|
||||
if ty.bits() == 64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
}
|
||||
}
|
||||
|
||||
fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
|
||||
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
|
||||
|
||||
if let Some(c) = inputs.constant {
|
||||
// Generate constants fresh at each use to minimize long-range
|
||||
// register pressure.
|
||||
let ty = self.value_type(val);
|
||||
return RegMem::reg(generated_code::constructor_imm(self, ty, c).unwrap());
|
||||
}
|
||||
|
||||
if let Some((src_insn, 0)) = inputs.inst {
|
||||
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, src_insn) {
|
||||
self.lower_ctx.sink_inst(src_insn);
|
||||
let amode = lower_to_amode(self.lower_ctx, addr_input, offset);
|
||||
return RegMem::mem(amode);
|
||||
}
|
||||
}
|
||||
|
||||
RegMem::reg(self.put_in_reg(val))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512vl_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512vl_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512dq_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512dq_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
|
||||
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
|
||||
let constant = self.lower_ctx.get_constant(inst)?;
|
||||
let imm = u8::try_from(constant).ok()?;
|
||||
Some(Imm8Reg::Imm8 { imm })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simm32_from_value(&mut self, val: Value) -> Option<RegMemImm> {
|
||||
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
|
||||
let constant: u64 = self.lower_ctx.get_constant(inst)?;
|
||||
let constant = constant as i64;
|
||||
to_simm32(constant)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simm32_from_imm64(&mut self, imm: Imm64) -> Option<RegMemImm> {
|
||||
to_simm32(imm.bits())
|
||||
}
|
||||
|
||||
fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
|
||||
let input = self.lower_ctx.get_value_as_source_or_const(val);
|
||||
if let Some((inst, 0)) = input.inst {
|
||||
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, inst) {
|
||||
return Some(SinkableLoad {
|
||||
inst,
|
||||
addr_input,
|
||||
offset,
|
||||
});
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn sink_load(&mut self, load: &SinkableLoad) -> RegMemImm {
|
||||
self.lower_ctx.sink_inst(load.inst);
|
||||
|
||||
let flags = self
|
||||
.lower_ctx
|
||||
.memflags(load.inst)
|
||||
.expect("sinkable loads should have memflags");
|
||||
|
||||
let base = self
|
||||
.lower_ctx
|
||||
.put_input_in_regs(load.addr_input.insn, load.addr_input.input)
|
||||
.only_reg()
|
||||
.unwrap();
|
||||
|
||||
RegMemImm::Mem {
|
||||
addr: Amode::imm_reg(load.offset as u32, base)
|
||||
.with_flags(flags)
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
|
||||
ExtMode::new(from_bits, to_bits).unwrap()
|
||||
}
|
||||
|
||||
fn emit(&mut self, inst: &MInst) -> Unit {
|
||||
for inst in inst.clone().mov_mitosis() {
|
||||
self.emitted_insts.push(inst);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nonzero_u64_fits_in_u32(&mut self, x: u64) -> Option<u64> {
|
||||
if x != 0 && x < u64::from(u32::MAX) {
|
||||
Some(x)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_simm32(constant: i64) -> Option<RegMemImm> {
|
||||
if constant == ((constant << 32) >> 32) {
|
||||
Some(RegMemImm::Imm {
|
||||
simm32: constant as u32,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
#[track_caller]
|
||||
fn out_of_line_panic(msg: &str) -> ! {
|
||||
panic!("{}", msg);
|
||||
}
|
||||
3496
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
Normal file
3496
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
Normal file
File diff suppressed because it is too large
Load Diff
22
cranelift/codegen/src/isle.rs
Normal file
22
cranelift/codegen/src/isle.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
// GENERATED BY ISLE. DO NOT EDIT!
|
||||
//
|
||||
// Generated automatically from the instruction-selection DSL code in:
|
||||
// - src/clif.isle
|
||||
|
||||
#![allow(dead_code, unreachable_code, unreachable_patterns)]
|
||||
#![allow(unused_imports, unused_variables, non_snake_case)]
|
||||
|
||||
use super::*; // Pulls in all external types.
|
||||
|
||||
/// Context during lowering: an implementation of this trait
|
||||
/// must be provided with all external constructors and extractors.
|
||||
/// A mutable borrow is passed along through all lowering logic.
|
||||
pub trait Context {
|
||||
fn value_list_slice(&mut self, arg0: &ValueList) -> (ValueSlice,);
|
||||
fn unwrap_head_value_list_1(&mut self, arg0: &ValueList) -> (Value, ValueSlice,);
|
||||
fn unwrap_head_value_list_2(&mut self, arg0: &ValueList) -> (Value, Value, ValueSlice,);
|
||||
fn pack_value_array_2(&mut self, arg0: Value, arg1: Value) -> (ValueArray2,);
|
||||
fn unpack_value_array_2(&mut self, arg0: &ValueArray2) -> (Value, Value,);
|
||||
fn pack_value_array_3(&mut self, arg0: Value, arg1: Value, arg2: Value) -> (ValueArray3,);
|
||||
fn unpack_value_array_3(&mut self, arg0: &ValueArray3) -> (Value, Value, Value,);
|
||||
}
|
||||
@@ -11,9 +11,9 @@ use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::ir::{
|
||||
ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, Inst,
|
||||
InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef,
|
||||
ValueLabelAssignments, ValueLabelStart,
|
||||
ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, ExternalName, Function,
|
||||
GlobalValueData, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value,
|
||||
ValueDef, ValueLabelAssignments, ValueLabelStart,
|
||||
};
|
||||
use crate::machinst::{
|
||||
writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode,
|
||||
@@ -61,6 +61,8 @@ pub trait LowerCtx {
|
||||
/// The instruction type for which this lowering framework is instantiated.
|
||||
type I: VCodeInst;
|
||||
|
||||
fn dfg(&self) -> &DataFlowGraph;
|
||||
|
||||
// Function-level queries:
|
||||
|
||||
/// Get the `ABICallee`.
|
||||
@@ -124,8 +126,12 @@ pub trait LowerCtx {
|
||||
/// instruction's result(s) must have *no* uses remaining, because it will
|
||||
/// not be codegen'd (it has been integrated into the current instruction).
|
||||
fn get_input_as_source_or_const(&self, ir_inst: Inst, idx: usize) -> NonRegInput;
|
||||
/// Like `get_input_as_source_or_const` but with a `Value`.
|
||||
fn get_value_as_source_or_const(&self, value: Value) -> NonRegInput;
|
||||
/// Put the `idx`th input into register(s) and return the assigned register.
|
||||
fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg>;
|
||||
/// Put the given value into register(s) and return the assigned register.
|
||||
fn put_value_in_regs(&mut self, value: Value) -> ValueRegs<Reg>;
|
||||
/// Get the `idx`th output register(s) of the given IR instruction. When
|
||||
/// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that
|
||||
/// the backend will write results to these output register(s). This
|
||||
@@ -1002,101 +1008,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
|
||||
Ok((vcode, stack_map_info))
|
||||
}
|
||||
|
||||
fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {
|
||||
log::trace!("put_value_in_reg: val {}", val);
|
||||
let mut regs = self.value_regs[val];
|
||||
log::trace!(" -> regs {:?}", regs);
|
||||
assert!(regs.is_valid());
|
||||
|
||||
self.value_lowered_uses[val] += 1;
|
||||
|
||||
// Pinned-reg hack: if backend specifies a fixed pinned register, use it
|
||||
// directly when we encounter a GetPinnedReg op, rather than lowering
|
||||
// the actual op, and do not return the source inst to the caller; the
|
||||
// value comes "out of the ether" and we will not force generation of
|
||||
// the superfluous move.
|
||||
if let ValueDef::Result(i, 0) = self.f.dfg.value_def(val) {
|
||||
if self.f.dfg[i].opcode() == Opcode::GetPinnedReg {
|
||||
if let Some(pr) = self.pinned_reg {
|
||||
regs = ValueRegs::one(pr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
|
||||
/// Get the actual inputs for a value. This is the implementation for
|
||||
/// `get_input()` but starting from the SSA value, which is not exposed to
|
||||
/// the backend.
|
||||
fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {
|
||||
log::trace!(
|
||||
"get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",
|
||||
val,
|
||||
self.cur_inst,
|
||||
self.cur_scan_entry_color,
|
||||
);
|
||||
let inst = match self.f.dfg.value_def(val) {
|
||||
// OK to merge source instruction if (i) we have a source
|
||||
// instruction, and:
|
||||
// - It has no side-effects, OR
|
||||
// - It has a side-effect, has one output value, that one output has
|
||||
// only one use (this one), and the instruction's color is *one less
|
||||
// than* the current scan color.
|
||||
//
|
||||
// This latter set of conditions is testing whether a
|
||||
// side-effecting instruction can sink to the current scan
|
||||
// location; this is possible if the in-color of this inst is
|
||||
// equal to the out-color of the producing inst, so no other
|
||||
// side-effecting ops occur between them (which will only be true
|
||||
// if they are in the same BB, because color increments at each BB
|
||||
// start).
|
||||
//
|
||||
// If it is actually sunk, then in `merge_inst()`, we update the
|
||||
// scan color so that as we scan over the range past which the
|
||||
// instruction was sunk, we allow other instructions (that came
|
||||
// prior to the sunk instruction) to sink.
|
||||
ValueDef::Result(src_inst, result_idx) => {
|
||||
let src_side_effect = has_lowering_side_effect(self.f, src_inst);
|
||||
log::trace!(" -> src inst {}", src_inst);
|
||||
log::trace!(" -> has lowering side effect: {}", src_side_effect);
|
||||
if !src_side_effect {
|
||||
// Pure instruction: always possible to sink.
|
||||
Some((src_inst, result_idx))
|
||||
} else {
|
||||
// Side-effect: test whether this is the only use of the
|
||||
// only result of the instruction, and whether colors allow
|
||||
// the code-motion.
|
||||
if self.cur_scan_entry_color.is_some()
|
||||
&& self.value_uses[val] == 1
|
||||
&& self.value_lowered_uses[val] == 0
|
||||
&& self.num_outputs(src_inst) == 1
|
||||
&& self
|
||||
.side_effect_inst_entry_colors
|
||||
.get(&src_inst)
|
||||
.unwrap()
|
||||
.get()
|
||||
+ 1
|
||||
== self.cur_scan_entry_color.unwrap().get()
|
||||
{
|
||||
Some((src_inst, 0))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
let constant = inst.and_then(|(inst, _)| self.get_constant(inst));
|
||||
|
||||
NonRegInput { inst, constant }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
type I = I;
|
||||
|
||||
fn dfg(&self) -> &DataFlowGraph {
|
||||
&self.f.dfg
|
||||
}
|
||||
|
||||
fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
|
||||
self.vcode.abi()
|
||||
}
|
||||
@@ -1207,12 +1127,99 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
self.get_value_as_source_or_const(val)
|
||||
}
|
||||
|
||||
fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {
|
||||
log::trace!(
|
||||
"get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",
|
||||
val,
|
||||
self.cur_inst,
|
||||
self.cur_scan_entry_color,
|
||||
);
|
||||
let inst = match self.f.dfg.value_def(val) {
|
||||
// OK to merge source instruction if (i) we have a source
|
||||
// instruction, and:
|
||||
// - It has no side-effects, OR
|
||||
// - It has a side-effect, has one output value, that one output has
|
||||
// only one use (this one), and the instruction's color is *one less
|
||||
// than* the current scan color.
|
||||
//
|
||||
// This latter set of conditions is testing whether a
|
||||
// side-effecting instruction can sink to the current scan
|
||||
// location; this is possible if the in-color of this inst is
|
||||
// equal to the out-color of the producing inst, so no other
|
||||
// side-effecting ops occur between them (which will only be true
|
||||
// if they are in the same BB, because color increments at each BB
|
||||
// start).
|
||||
//
|
||||
// If it is actually sunk, then in `merge_inst()`, we update the
|
||||
// scan color so that as we scan over the range past which the
|
||||
// instruction was sunk, we allow other instructions (that came
|
||||
// prior to the sunk instruction) to sink.
|
||||
ValueDef::Result(src_inst, result_idx) => {
|
||||
let src_side_effect = has_lowering_side_effect(self.f, src_inst);
|
||||
log::trace!(" -> src inst {}", src_inst);
|
||||
log::trace!(" -> has lowering side effect: {}", src_side_effect);
|
||||
if !src_side_effect {
|
||||
// Pure instruction: always possible to sink.
|
||||
Some((src_inst, result_idx))
|
||||
} else {
|
||||
// Side-effect: test whether this is the only use of the
|
||||
// only result of the instruction, and whether colors allow
|
||||
// the code-motion.
|
||||
if self.cur_scan_entry_color.is_some()
|
||||
&& self.value_uses[val] == 1
|
||||
&& self.value_lowered_uses[val] == 0
|
||||
&& self.num_outputs(src_inst) == 1
|
||||
&& self
|
||||
.side_effect_inst_entry_colors
|
||||
.get(&src_inst)
|
||||
.unwrap()
|
||||
.get()
|
||||
+ 1
|
||||
== self.cur_scan_entry_color.unwrap().get()
|
||||
{
|
||||
Some((src_inst, 0))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
let constant = inst.and_then(|(inst, _)| self.get_constant(inst));
|
||||
|
||||
NonRegInput { inst, constant }
|
||||
}
|
||||
|
||||
fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg> {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.put_value_in_regs(val)
|
||||
}
|
||||
|
||||
fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
log::trace!("put_value_in_reg: val {}", val);
|
||||
let mut regs = self.value_regs[val];
|
||||
log::trace!(" -> regs {:?}", regs);
|
||||
assert!(regs.is_valid());
|
||||
|
||||
self.value_lowered_uses[val] += 1;
|
||||
|
||||
// Pinned-reg hack: if backend specifies a fixed pinned register, use it
|
||||
// directly when we encounter a GetPinnedReg op, rather than lowering
|
||||
// the actual op, and do not return the source inst to the caller; the
|
||||
// value comes "out of the ether" and we will not force generation of
|
||||
// the superfluous move.
|
||||
if let ValueDef::Result(i, 0) = self.f.dfg.value_def(val) {
|
||||
if self.f.dfg[i].opcode() == Opcode::GetPinnedReg {
|
||||
if let Some(pr) = self.pinned_reg {
|
||||
regs = ValueRegs::one(pr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
|
||||
fn get_output(&self, ir_inst: Inst, idx: usize) -> ValueRegs<Writable<Reg>> {
|
||||
let val = self.f.dfg.inst_results(ir_inst)[idx];
|
||||
writable_value_regs(self.value_regs[val])
|
||||
|
||||
202
cranelift/codegen/src/prelude.isle
Normal file
202
cranelift/codegen/src/prelude.isle
Normal file
@@ -0,0 +1,202 @@
|
||||
;; This is a prelude of standard definitions for ISLE, the instruction-selector
|
||||
;; DSL, as we use it bound to our interfaces.
|
||||
|
||||
;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `()`
|
||||
(type Unit (primitive Unit))
|
||||
|
||||
;; `bool` is declared in `clif.isle`.
|
||||
(extern const $true bool)
|
||||
(extern const $false bool)
|
||||
|
||||
(type u8 (primitive u8))
|
||||
(type u16 (primitive u16))
|
||||
(type u32 (primitive u32))
|
||||
(type u64 (primitive u64))
|
||||
(type u128 (primitive u128))
|
||||
(type usize (primitive usize))
|
||||
|
||||
(type i8 (primitive i8))
|
||||
(type i16 (primitive i16))
|
||||
(type i32 (primitive i32))
|
||||
(type i64 (primitive i64))
|
||||
(type i128 (primitive i128))
|
||||
(type isize (primitive isize))
|
||||
|
||||
;; `cranelift-entity`-based identifiers.
|
||||
(type Inst (primitive Inst))
|
||||
(type Type (primitive Type))
|
||||
(type Value (primitive Value))
|
||||
|
||||
;; ISLE representation of `&[Value]`.
|
||||
(type ValueSlice (primitive ValueSlice))
|
||||
|
||||
(type ValueList (primitive ValueList))
|
||||
(type ValueRegs (primitive ValueRegs))
|
||||
|
||||
;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type Reg (primitive Reg))
|
||||
(type WritableReg (primitive WritableReg))
|
||||
|
||||
;; Construct a `ValueRegs` of one register.
|
||||
(decl value_reg (Reg) ValueRegs)
|
||||
(extern constructor value_reg value_reg)
|
||||
|
||||
;; Construct a `ValueRegs` of two registers.
|
||||
(decl value_regs (Reg Reg) ValueRegs)
|
||||
(extern constructor value_regs value_regs)
|
||||
|
||||
;; Get a temporary register for writing.
|
||||
(decl temp_writable_reg (Type) WritableReg)
|
||||
(extern constructor temp_writable_reg temp_writable_reg)
|
||||
|
||||
;; Get a temporary register for reading.
|
||||
(decl temp_reg (Type) Reg)
|
||||
(rule (temp_reg ty)
|
||||
(writable_reg_to_reg (temp_writable_reg ty)))
|
||||
|
||||
;; Get the invalid register.
|
||||
(decl invalid_reg () Reg)
|
||||
(extern constructor invalid_reg invalid_reg)
|
||||
|
||||
;; Put the given value into a register.
|
||||
;;
|
||||
;; Asserts that the value fits into a single register, and doesn't require
|
||||
;; multiple registers for its representation (like `i128` on x64 for example).
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_reg (Value) Reg)
|
||||
(extern constructor put_in_reg put_in_reg)
|
||||
|
||||
;; Put the given value into one or more registers.
|
||||
;;
|
||||
;; As a side effect, this marks the value as used.
|
||||
(decl put_in_regs (Value) ValueRegs)
|
||||
(extern constructor put_in_regs put_in_regs)
|
||||
|
||||
;; Get the `n`th register inside a `ValueRegs`.
|
||||
(decl value_regs_get (ValueRegs usize) Reg)
|
||||
(extern constructor value_regs_get value_regs_get)
|
||||
|
||||
;; Put the value into one or more registers and return the first register.
|
||||
;;
|
||||
;; Unlike `put_in_reg`, this does not assert that the value fits in a single
|
||||
;; register. This is useful for things like a `i128` shift amount, where we mask
|
||||
;; the shift amount to the bit width of the value being shifted, and so the high
|
||||
;; half of the `i128` won't ever be used.
|
||||
;;
|
||||
;; As a side efect, this marks that value as used.
|
||||
(decl lo_reg (Value) Reg)
|
||||
(rule (lo_reg val)
|
||||
(let ((regs ValueRegs (put_in_regs val)))
|
||||
(value_regs_get regs 0)))
|
||||
|
||||
;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl u8_as_u64 (u8) u64)
|
||||
(extern constructor u8_as_u64 u8_as_u64)
|
||||
|
||||
(decl u16_as_u64 (u16) u64)
|
||||
(extern constructor u16_as_u64 u16_as_u64)
|
||||
|
||||
(decl u32_as_u64 (u32) u64)
|
||||
(extern constructor u32_as_u64 u32_as_u64)
|
||||
|
||||
;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(extern const $B1 Type)
|
||||
(extern const $B8 Type)
|
||||
(extern const $B16 Type)
|
||||
(extern const $B32 Type)
|
||||
(extern const $B64 Type)
|
||||
(extern const $B128 Type)
|
||||
|
||||
(extern const $I8 Type)
|
||||
(extern const $I16 Type)
|
||||
(extern const $I32 Type)
|
||||
(extern const $I64 Type)
|
||||
(extern const $I128 Type)
|
||||
|
||||
(extern const $B8X16 Type)
|
||||
(extern const $B16X8 Type)
|
||||
(extern const $B32X4 Type)
|
||||
(extern const $B64X2 Type)
|
||||
|
||||
(extern const $I8X16 Type)
|
||||
(extern const $I16X8 Type)
|
||||
(extern const $I32X4 Type)
|
||||
(extern const $I64X2 Type)
|
||||
|
||||
(extern const $F32X4 Type)
|
||||
(extern const $F64X2 Type)
|
||||
|
||||
;; Get the bit width of a given type.
|
||||
(decl ty_bits (Type) u16)
|
||||
(extern constructor ty_bits ty_bits)
|
||||
|
||||
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; An extractor that only matches types that can fit in 64 bits.
|
||||
(decl fits_in_64 (Type) Type)
|
||||
(extern extractor fits_in_64 fits_in_64)
|
||||
|
||||
;; Extractor to get a `ValueSlice` out of a `ValueList`.
|
||||
(decl value_list_slice (ValueSlice) ValueList)
|
||||
(extern extractor infallible value_list_slice value_list_slice)
|
||||
|
||||
;; Extractor to get the first element from a value list, along with its tail as
|
||||
;; a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
|
||||
(extern extractor infallible unwrap_head_value_list_1 unwrap_head_value_list_1)
|
||||
|
||||
;; Extractor to get the first two elements from a value list, along with its
|
||||
;; tail as a `ValueSlice`.
|
||||
(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
|
||||
(extern extractor infallible unwrap_head_value_list_2 unwrap_head_value_list_2)
|
||||
|
||||
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
|
||||
(decl writable_reg_to_reg (WritableReg) Reg)
|
||||
(extern constructor writable_reg_to_reg writable_reg_to_reg)
|
||||
|
||||
;; Extract a `u64` from an `Imm64`.
|
||||
(decl u64_from_imm64 (u64) Imm64)
|
||||
(extern extractor infallible u64_from_imm64 u64_from_imm64)
|
||||
|
||||
;; Extract the result values for the given instruction.
|
||||
(decl inst_results (ValueSlice) Inst)
|
||||
(extern extractor infallible inst_results inst_results)
|
||||
|
||||
;; Extract the first result value of the given instruction.
|
||||
(decl first_result (Value) Inst)
|
||||
(extern extractor first_result first_result)
|
||||
|
||||
;; Extract the `InstructionData` for an `Inst`.
|
||||
(decl inst_data (InstructionData) Inst)
|
||||
(extern extractor infallible inst_data inst_data)
|
||||
|
||||
;; Extract the type of a `Value`.
|
||||
(decl value_type (Type) Value)
|
||||
(extern extractor infallible value_type value_type)
|
||||
|
||||
;; Extract the type of the instruction's first result.
|
||||
(decl result_type (Type) Inst)
|
||||
(extractor (result_type ty)
|
||||
(first_result (value_type ty)))
|
||||
|
||||
;; Extract the type of the instruction's first result and pass along the
|
||||
;; instruction as well.
|
||||
(decl has_type (Type Inst) Inst)
|
||||
(extractor (has_type ty inst)
|
||||
(and (result_type ty)
|
||||
inst))
|
||||
|
||||
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
|
||||
;; type. Will only match when there is more than one lane.
|
||||
(decl multi_lane (u8 u16) Type)
|
||||
(extern extractor multi_lane multi_lane)
|
||||
|
||||
;; Match the instruction that defines the given value, if any.
|
||||
(decl def_inst (Inst) Value)
|
||||
(extern extractor def_inst def_inst)
|
||||
@@ -62,7 +62,7 @@ use serde::{Deserialize, Serialize};
|
||||
///
|
||||
/// The index stored in an `EntityList` points to part 2, the list elements. The value 0 is
|
||||
/// reserved for the empty list which isn't allocated in the vector.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct EntityList<T: EntityRef + ReservedValue> {
|
||||
index: u32,
|
||||
@@ -271,7 +271,7 @@ impl<T: EntityRef + ReservedValue> EntityList<T> {
|
||||
}
|
||||
|
||||
/// Get the list as a slice.
|
||||
pub fn as_slice<'a>(&'a self, pool: &'a ListPool<T>) -> &'a [T] {
|
||||
pub fn as_slice<'a>(&self, pool: &'a ListPool<T>) -> &'a [T] {
|
||||
let idx = self.index as usize;
|
||||
match pool.len_of(self) {
|
||||
None => &[],
|
||||
|
||||
@@ -700,31 +700,30 @@ block2(v6: i128):
|
||||
v8 = iadd.i128 v6, v7
|
||||
return v8
|
||||
|
||||
; check: pushq %rbp
|
||||
; check: Block 0:
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: xorq %rdi, %rdi
|
||||
; nextln: xorq %rsi, %rsi
|
||||
; nextln: testb $$1, %dl
|
||||
; nextln: jnz label1; j label2
|
||||
; check: Block 1:
|
||||
; check: movl $$0, %esi
|
||||
; nextln: movl $$0, %edi
|
||||
; nextln: movl $$1, %eax
|
||||
; nextln: movl $$0, %ecx
|
||||
; nextln: addq %rax, %rsi
|
||||
; nextln: adcq %rcx, %rdi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rdi, %rdx
|
||||
; check: movl $$1, %ecx
|
||||
; nextln: xorq %rax, %rax
|
||||
; nextln: addq %rcx, %rdi
|
||||
; nextln: adcq %rax, %rsi
|
||||
; nextln: movq %rdi, %rax
|
||||
; nextln: movq %rsi, %rdx
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
; check: Block 2:
|
||||
; check: movl $$0, %esi
|
||||
; nextln: movl $$0, %edi
|
||||
; nextln: movl $$2, %eax
|
||||
; nextln: movl $$0, %ecx
|
||||
; nextln: addq %rax, %rsi
|
||||
; nextln: adcq %rcx, %rdi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rdi, %rdx
|
||||
; check: movl $$2, %ecx
|
||||
; nextln: xorq %rax, %rax
|
||||
; nextln: addq %rcx, %rdi
|
||||
; nextln: adcq %rax, %rsi
|
||||
; nextln: movq %rdi, %rax
|
||||
; nextln: movq %rsi, %rdx
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
@@ -744,34 +743,32 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$32, %rsp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %r12, 0(%rsp)
|
||||
; nextln: movq %r13, 8(%rsp)
|
||||
; nextln: movq %r14, 16(%rsp)
|
||||
; nextln: movq %r8, %r14
|
||||
; nextln: movq 16(%rbp), %r10
|
||||
; nextln: movq %r9, %r11
|
||||
; nextln: movq 16(%rbp), %r13
|
||||
; nextln: movq 24(%rbp), %r12
|
||||
; nextln: movq 32(%rbp), %r11
|
||||
; nextln: movq 40(%rbp), %rax
|
||||
; nextln: movq 48(%rbp), %r13
|
||||
; nextln: movq %rsi, %r8
|
||||
; nextln: movq 32(%rbp), %r10
|
||||
; nextln: movq 40(%rbp), %r9
|
||||
; nextln: movq 48(%rbp), %rax
|
||||
; nextln: addq %rdx, %rdi
|
||||
; nextln: adcq %rcx, %r8
|
||||
; nextln: movq %rsi, %rdx
|
||||
; nextln: adcq %rcx, %rdx
|
||||
; nextln: xorq %rsi, %rsi
|
||||
; nextln: addq %r14, %r9
|
||||
; nextln: adcq %rsi, %r10
|
||||
; nextln: addq %rax, %r12
|
||||
; nextln: adcq %r13, %r11
|
||||
; nextln: addq %r9, %rdi
|
||||
; nextln: adcq %r10, %r8
|
||||
; nextln: addq %r8, %r11
|
||||
; nextln: adcq %rsi, %r13
|
||||
; nextln: addq %r9, %r12
|
||||
; nextln: adcq %rax, %r10
|
||||
; nextln: addq %r11, %rdi
|
||||
; nextln: adcq %r13, %rdx
|
||||
; nextln: addq %rdi, %r12
|
||||
; nextln: adcq %r8, %r11
|
||||
; nextln: adcq %rdx, %r10
|
||||
; nextln: movq %r12, %rax
|
||||
; nextln: movq %r11, %rdx
|
||||
; nextln: movq %r10, %rdx
|
||||
; nextln: movq 0(%rsp), %r12
|
||||
; nextln: movq 8(%rsp), %r13
|
||||
; nextln: movq 16(%rsp), %r14
|
||||
; nextln: addq $$32, %rsp
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
@@ -907,26 +904,25 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: movq %rdi, %rax
|
||||
; nextln: movq %rsi, %rdi
|
||||
; nextln: movq %rax, %rsi
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shlq %cl, %rsi
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shlq %cl, %rax
|
||||
; nextln: shlq %cl, %rdi
|
||||
; nextln: movl $$64, %ecx
|
||||
; nextln: subq %rdx, %rcx
|
||||
; nextln: shrq %cl, %rdi
|
||||
; nextln: shrq %cl, %rax
|
||||
; nextln: xorq %rcx, %rcx
|
||||
; nextln: testq $$127, %rdx
|
||||
; nextln: cmovzq %rcx, %rdi
|
||||
; nextln: orq %rax, %rdi
|
||||
; nextln: xorq %rax, %rax
|
||||
; nextln: andq $$64, %rdx
|
||||
; nextln: cmovzq %rdi, %rax
|
||||
; nextln: cmovzq %rcx, %rax
|
||||
; nextln: orq %rdi, %rax
|
||||
; nextln: testq $$64, %rdx
|
||||
; nextln: cmovzq %rsi, %rcx
|
||||
; nextln: cmovnzq %rsi, %rax
|
||||
; nextln: movq %rax, %rdx
|
||||
; nextln: cmovzq %rax, %rsi
|
||||
; nextln: movq %rcx, %rax
|
||||
; nextln: movq %rsi, %rdx
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
@@ -939,28 +935,26 @@ block0(v0: i128, v1: i128):
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %rdi, %rax
|
||||
; nextln: movq %rsi, %rdi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shrq %cl, %rdi
|
||||
; nextln: movq %rax, %rsi
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shrq %cl, %rsi
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shrq %cl, %rax
|
||||
; nextln: movl $$64, %ecx
|
||||
; nextln: subq %rdx, %rcx
|
||||
; nextln: shlq %cl, %rdi
|
||||
; nextln: shlq %cl, %rax
|
||||
; nextln: xorq %rcx, %rcx
|
||||
; nextln: testq $$127, %rdx
|
||||
; nextln: cmovzq %rcx, %rdi
|
||||
; nextln: orq %rax, %rdi
|
||||
; nextln: xorq %rax, %rax
|
||||
; nextln: cmovzq %rcx, %rax
|
||||
; nextln: orq %rdi, %rax
|
||||
; nextln: xorq %rcx, %rcx
|
||||
; nextln: andq $$64, %rdx
|
||||
; nextln: cmovzq %rsi, %rax
|
||||
; nextln: cmovzq %rdi, %rcx
|
||||
; nextln: cmovnzq %rsi, %rcx
|
||||
; nextln: movq %rax, %rdx
|
||||
; nextln: movq %rcx, %rax
|
||||
; nextln: testq $$64, %rdx
|
||||
; nextln: movq %rsi, %rdi
|
||||
; nextln: cmovzq %rax, %rdi
|
||||
; nextln: cmovzq %rsi, %rcx
|
||||
; nextln: movq %rdi, %rax
|
||||
; nextln: movq %rcx, %rdx
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
@@ -1006,53 +1000,51 @@ block0(v0: i128, v1: i128):
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %rdi, %r8
|
||||
; nextln: movq %r8, %r9
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shlq %cl, %r9
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rdi, %rax
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shlq %cl, %rax
|
||||
; nextln: movq %rsi, %r8
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: shlq %cl, %r8
|
||||
; nextln: movl $$64, %ecx
|
||||
; nextln: subq %rdx, %rcx
|
||||
; nextln: movq %r8, %r10
|
||||
; nextln: shrq %cl, %r10
|
||||
; nextln: xorq %rdi, %rdi
|
||||
; nextln: movq %rdi, %r9
|
||||
; nextln: shrq %cl, %r9
|
||||
; nextln: xorq %rcx, %rcx
|
||||
; nextln: testq $$127, %rdx
|
||||
; nextln: cmovzq %rdi, %r10
|
||||
; nextln: orq %rax, %r10
|
||||
; nextln: xorq %rax, %rax
|
||||
; nextln: movq %rdx, %rcx
|
||||
; nextln: andq $$64, %rcx
|
||||
; nextln: cmovzq %r10, %rax
|
||||
; nextln: cmovzq %r9, %rdi
|
||||
; nextln: cmovnzq %r9, %rax
|
||||
; nextln: cmovzq %rcx, %r9
|
||||
; nextln: orq %r8, %r9
|
||||
; nextln: testq $$64, %rdx
|
||||
; nextln: movq %rcx, %r8
|
||||
; nextln: cmovzq %rax, %r8
|
||||
; nextln: cmovzq %r9, %rax
|
||||
; nextln: movl $$128, %r9d
|
||||
; nextln: subq %rdx, %r9
|
||||
; nextln: movq %rsi, %rdx
|
||||
; nextln: movq %rdi, %rdx
|
||||
; nextln: movq %r9, %rcx
|
||||
; nextln: shrq %cl, %rdx
|
||||
; nextln: movq %rsi, %rdi
|
||||
; nextln: movq %r9, %rcx
|
||||
; nextln: shrq %cl, %r8
|
||||
; nextln: shrq %cl, %rdi
|
||||
; nextln: movl $$64, %ecx
|
||||
; nextln: subq %r9, %rcx
|
||||
; nextln: shlq %cl, %rsi
|
||||
; nextln: xorq %rcx, %rcx
|
||||
; nextln: testq $$127, %r9
|
||||
; nextln: cmovzq %rcx, %rsi
|
||||
; nextln: orq %r8, %rsi
|
||||
; nextln: xorq %rcx, %rcx
|
||||
; nextln: xorq %r8, %r8
|
||||
; nextln: andq $$64, %r9
|
||||
; nextln: cmovzq %rdx, %rcx
|
||||
; nextln: cmovzq %rsi, %r8
|
||||
; nextln: cmovnzq %rdx, %r8
|
||||
; nextln: orq %rdi, %r8
|
||||
; nextln: orq %rax, %rcx
|
||||
; nextln: orq %rdx, %rsi
|
||||
; nextln: xorq %rdx, %rdx
|
||||
; nextln: testq $$64, %r9
|
||||
; nextln: movq %rdi, %rcx
|
||||
; nextln: cmovzq %rsi, %rcx
|
||||
; nextln: movq %rdx, %rsi
|
||||
; nextln: cmovzq %rdi, %rsi
|
||||
; nextln: orq %rcx, %r8
|
||||
; nextln: orq %rsi, %rax
|
||||
; nextln: movq %rax, %rdx
|
||||
; nextln: movq %r8, %rax
|
||||
; nextln: movq %rcx, %rdx
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
4
cranelift/isle/fuzz/README.md
Normal file
4
cranelift/isle/fuzz/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# ISLE Fuzz Targets
|
||||
|
||||
These are separate from the top-level `wasmtime/fuzz` fuzz targets because we
|
||||
don't intend to run them on OSS-Fuzz. They are just for local ISLE hacking.
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
authors = ["Chris Fallin <chris@cfallin.org>", "Nick Fitzgerald <fitzgen@gmail.com>"]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
description = "ISLE: Instruction Selection and Lowering Expressions. A domain-specific language for instruction selection in Cranelift."
|
||||
edition = "2018"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
[package]
|
||||
name = "islec"
|
||||
version = "0.1.0"
|
||||
authors = ["Chris Fallin <chris@cfallin.org>"]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
edition = "2018"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
log = "0.4"
|
||||
|
||||
@@ -35,4 +35,5 @@ skip = [
|
||||
{ name = "wast" }, # old one pulled in by witx
|
||||
{ name = "itertools" }, # 0.9 pulled in by criterion-plot
|
||||
{ name = "quick-error" }, # transitive dependencies
|
||||
{ name = "textwrap" }, # `miette` and `clap` depend on different versions
|
||||
]
|
||||
|
||||
@@ -26,6 +26,7 @@ const CRATES_TO_PUBLISH: &[&str] = &[
|
||||
"peepmatic",
|
||||
"peepmatic-souper",
|
||||
// cranelift
|
||||
"isle",
|
||||
"cranelift-entity",
|
||||
"wasmtime-types",
|
||||
"cranelift-bforest",
|
||||
|
||||
Reference in New Issue
Block a user