From c266f7f4c3141436571eee0a70421756a253a1ee Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Mon, 31 May 2021 12:27:29 -0700 Subject: [PATCH 1/3] Cranelift: Add `LibCall::Memcmp` The comment says the enum is "likely to grow" and the function's been in libc since C89, so hopefully this is ok. I'd like to use it for emitting things like array equality. --- cranelift/codegen/src/ir/libcall.rs | 11 +++ cranelift/frontend/src/frontend.rs | 100 ++++++++++++++++++++++++++++ cranelift/module/src/lib.rs | 1 + 3 files changed, 112 insertions(+) diff --git a/cranelift/codegen/src/ir/libcall.rs b/cranelift/codegen/src/ir/libcall.rs index e8298d8ee7..5dbbd7232d 100644 --- a/cranelift/codegen/src/ir/libcall.rs +++ b/cranelift/codegen/src/ir/libcall.rs @@ -56,6 +56,8 @@ pub enum LibCall { Memset, /// libc.memmove Memmove, + /// libc.memcmp + Memcmp, /// Elf __tls_get_addr ElfTlsGetAddr, @@ -92,6 +94,7 @@ impl FromStr for LibCall { "Memcpy" => Ok(Self::Memcpy), "Memset" => Ok(Self::Memset), "Memmove" => Ok(Self::Memmove), + "Memcmp" => Ok(Self::Memcmp), "ElfTlsGetAddr" => Ok(Self::ElfTlsGetAddr), _ => Err(()), @@ -157,6 +160,7 @@ impl LibCall { Memcpy, Memset, Memmove, + Memcmp, ElfTlsGetAddr, ] } @@ -201,4 +205,11 @@ mod tests { fn parsing() { assert_eq!("FloorF32".parse(), Ok(LibCall::FloorF32)); } + + #[test] + fn all_libcalls_to_from_string() { + for &libcall in LibCall::all_libcalls() { + assert_eq!(libcall.to_string().parse(), Ok(libcall)); + } + } } diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index 7e72608b51..957c0c8a0d 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -804,6 +804,42 @@ impl<'a> FunctionBuilder<'a> { self.ins().call(libc_memmove, &[dest, source, size]); } + + /// Calls libc.memcmp + /// + /// Compares `size` bytes from memory starting at `left` to memory starting + /// at `right`. Returns `0` if all `n` bytes are equal. If the first difference + /// is at offset `i`, returns a positive integer if `ugt(left[i], right[i])` + /// and a negative integer if `ult(left[i], right[i])`. + /// + /// Returns a C `int`, which is currently always [`types::I32`]. + pub fn call_memcmp( + &mut self, + config: TargetFrontendConfig, + left: Value, + right: Value, + size: Value, + ) -> Value { + let pointer_type = config.pointer_type(); + let signature = { + let mut s = Signature::new(config.default_call_conv); + s.params.reserve(3); + s.params.push(AbiParam::new(pointer_type)); + s.params.push(AbiParam::new(pointer_type)); + s.params.push(AbiParam::new(pointer_type)); + s.returns.push(AbiParam::new(types::I32)); + self.import_signature(s) + }; + + let libc_memcmp = self.import_function(ExtFuncData { + name: ExternalName::LibCall(LibCall::Memcmp), + signature, + colocated: false, + }); + + let call = self.ins().call(libc_memcmp, &[left, right, size]); + self.func.dfg.first_result(call) + } } fn greatest_divisible_power_of_two(size: u64) -> u64 { @@ -1212,6 +1248,70 @@ block0: ); } + #[test] + fn memcmp() { + use core::str::FromStr; + use cranelift_codegen::{isa, settings}; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + let triple = + ::target_lexicon::Triple::from_str("x86_64").expect("Couldn't create x86_64 triple"); + + let target = isa::lookup(triple) + .ok() + .map(|b| b.finish(shared_flags)) + .expect("This test requires x86_64 support."); + + let mut sig = Signature::new(target.default_call_conv()); + sig.returns.push(AbiParam::new(I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let x = Variable::new(0); + let y = Variable::new(1); + let z = Variable::new(2); + builder.declare_var(x, target.pointer_type()); + builder.declare_var(y, target.pointer_type()); + builder.declare_var(z, target.pointer_type()); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + + let left = builder.use_var(x); + let right = builder.use_var(y); + let size = builder.use_var(z); + let cmp = builder.call_memcmp(target.frontend_config(), left, right, size); + builder.ins().return_(&[cmp]); + + builder.seal_all_blocks(); + builder.finalize(); + } + + assert_eq!( + func.display(None).to_string(), + "function %sample() -> i32 system_v { + sig0 = (i64, i64, i64) -> i32 system_v + fn0 = %Memcmp sig0 + +block0: + v6 = iconst.i64 0 + v2 -> v6 + v5 = iconst.i64 0 + v1 -> v5 + v4 = iconst.i64 0 + v0 -> v4 + v3 = call fn0(v0, v1, v2) + return v3 +} +" + ); + } + #[test] fn undef_vector_vars() { let mut sig = Signature::new(CallConv::SystemV); diff --git a/cranelift/module/src/lib.rs b/cranelift/module/src/lib.rs index 857aa87382..21d1c9df27 100644 --- a/cranelift/module/src/lib.rs +++ b/cranelift/module/src/lib.rs @@ -73,6 +73,7 @@ pub fn default_libcall_names() -> Box String + Send + Syn ir::LibCall::Memcpy => "memcpy".to_owned(), ir::LibCall::Memset => "memset".to_owned(), ir::LibCall::Memmove => "memmove".to_owned(), + ir::LibCall::Memcmp => "memcmp".to_owned(), ir::LibCall::ElfTlsGetAddr => "__tls_get_addr".to_owned(), }) From d55a19365e0ebcee3c7e20f9596c2667574bd856 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Tue, 1 Jun 2021 08:41:56 -0700 Subject: [PATCH 2/3] Add FunctionBuilder::emit_small_memory_compare This takes an IntCC for the comparison to do, though panics for Signed* since memcmp is an unsigned comparison. Currently it's most useful for (Not)Equal, but once big-endian loads are implemented it'll be able to support the other Unsigned* comparisons nicely on more than just bytes. --- cranelift/frontend/src/frontend.rs | 297 ++++++++++++++++++++++++++++- 1 file changed, 293 insertions(+), 4 deletions(-) diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index 957c0c8a0d..eb87cb0e66 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -4,6 +4,7 @@ use crate::variable::Variable; use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::entity::{EntitySet, SecondaryMap}; use cranelift_codegen::ir; +use cranelift_codegen::ir::condcodes::IntCC; use cranelift_codegen::ir::{ types, AbiParam, Block, DataFlowGraph, ExtFuncData, ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstBuilder, InstBuilderBase, @@ -840,6 +841,94 @@ impl<'a> FunctionBuilder<'a> { let call = self.ins().call(libc_memcmp, &[left, right, size]); self.func.dfg.first_result(call) } + + /// Optimised [`Self::call_memcmp`] for small copies. + /// + /// This implements the byte slice comparison `int_cc(left[..size], right[..size])`. + /// + /// `left_align` and `right_align` are the statically-known alignments of the + /// `left` and `right` pointers respectively. These are used to know whether + /// to mark `load`s as aligned. It's always fine to pass `1` for these, but + /// passing something higher than the true alignment may trap or otherwise + /// misbehave as described in [`MemFlags::aligned`]. + /// + /// Note that `memcmp` is a *big-endian* and *unsigned* comparison. + /// As such, this panics when called with `IntCC::Signed*` or `IntCC::*Overflow`. + pub fn emit_small_memory_compare( + &mut self, + config: TargetFrontendConfig, + int_cc: IntCC, + left: Value, + right: Value, + size: u64, + left_align: std::num::NonZeroU8, + right_align: std::num::NonZeroU8, + flags: MemFlags, + ) -> Value { + use IntCC::*; + let (zero_cc, empty_imm) = match int_cc { + // + Equal => (Equal, true), + NotEqual => (NotEqual, false), + + UnsignedLessThan => (SignedLessThan, false), + UnsignedGreaterThanOrEqual => (SignedGreaterThanOrEqual, true), + UnsignedGreaterThan => (SignedGreaterThan, false), + UnsignedLessThanOrEqual => (SignedLessThanOrEqual, true), + + SignedLessThan + | SignedGreaterThanOrEqual + | SignedGreaterThan + | SignedLessThanOrEqual => { + panic!("Signed comparison {} not supported by memcmp", int_cc) + } + Overflow | NotOverflow => { + panic!("Overflow comparison {} not supported by memcmp", int_cc) + } + }; + + if size == 0 { + return self.ins().bconst(types::B1, empty_imm); + } + + // Future work could consider expanding this to handle more-complex scenarios. + fn type_from_byte_size(size: u64) -> Option { + if size > 16 { + return None; + } + Type::int(size as u16 * 8) + } + if let Some(small_type) = type_from_byte_size(size) { + if let Equal | NotEqual = zero_cc { + let mut left_flags = flags; + if size == left_align.get().into() { + left_flags.set_aligned(); + } + let mut right_flags = flags; + if size == right_align.get().into() { + right_flags.set_aligned(); + } + let left_val = self.ins().load(small_type, left_flags, left, 0); + let right_val = self.ins().load(small_type, right_flags, right, 0); + return self.ins().icmp(int_cc, left_val, right_val); + } else if small_type == types::I8 { + // Once the big-endian loads from wasmtime#2492 are implemented in + // the backends, we could easily handle comparisons for more sizes here. + // But for now, just handle single bytes where we don't need to worry. + + let mut aligned_flags = flags; + aligned_flags.set_aligned(); + let left_val = self.ins().load(small_type, aligned_flags, left, 0); + let right_val = self.ins().load(small_type, aligned_flags, right, 0); + return self.ins().icmp(int_cc, left_val, right_val); + } + } + + let pointer_type = config.pointer_type(); + let size = self.ins().iconst(pointer_type, size as i64); + let cmp = self.call_memcmp(config, left, right, size); + self.ins().icmp_imm(zero_cc, cmp, 0) + } } fn greatest_divisible_power_of_two(size: u64) -> u64 { @@ -876,11 +965,12 @@ mod tests { use crate::Variable; use alloc::string::ToString; use cranelift_codegen::entity::EntityRef; + use cranelift_codegen::ir::condcodes::IntCC; use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{ - AbiParam, ExternalName, Function, InstBuilder, MemFlags, Signature, + AbiParam, ExternalName, Function, InstBuilder, MemFlags, Signature, Value, }; - use cranelift_codegen::isa::{CallConv, TargetFrontendConfig}; + use cranelift_codegen::isa::{CallConv, TargetFrontendConfig, TargetIsa}; use cranelift_codegen::settings; use cranelift_codegen::verifier::verify_function; use target_lexicon::PointerWidth; @@ -1251,7 +1341,7 @@ block0: #[test] fn memcmp() { use core::str::FromStr; - use cranelift_codegen::{isa, settings}; + use cranelift_codegen::isa; let shared_builder = settings::builder(); let shared_flags = settings::Flags::new(shared_builder); @@ -1293,7 +1383,7 @@ block0: } assert_eq!( - func.display(None).to_string(), + func.display().to_string(), "function %sample() -> i32 system_v { sig0 = (i64, i64, i64) -> i32 system_v fn0 = %Memcmp sig0 @@ -1312,6 +1402,205 @@ block0: ); } + #[test] + fn small_memcmp_zero_size() { + let align_eight = std::num::NonZeroU8::new(8).unwrap(); + small_memcmp_helper( + " +block0: + v4 = iconst.i64 0 + v1 -> v4 + v3 = iconst.i64 0 + v0 -> v3 + v2 = bconst.b1 true + return v2", + |builder, target, x, y| { + builder.emit_small_memory_compare( + target.frontend_config(), + IntCC::UnsignedGreaterThanOrEqual, + x, + y, + 0, + align_eight, + align_eight, + MemFlags::new(), + ) + }, + ); + } + + #[test] + fn small_memcmp_byte_ugt() { + let align_one = std::num::NonZeroU8::new(1).unwrap(); + small_memcmp_helper( + " +block0: + v6 = iconst.i64 0 + v1 -> v6 + v5 = iconst.i64 0 + v0 -> v5 + v2 = load.i8 aligned v0 + v3 = load.i8 aligned v1 + v4 = icmp ugt v2, v3 + return v4", + |builder, target, x, y| { + builder.emit_small_memory_compare( + target.frontend_config(), + IntCC::UnsignedGreaterThan, + x, + y, + 1, + align_one, + align_one, + MemFlags::new(), + ) + }, + ); + } + + #[test] + fn small_memcmp_aligned_eq() { + let align_four = std::num::NonZeroU8::new(4).unwrap(); + small_memcmp_helper( + " +block0: + v6 = iconst.i64 0 + v1 -> v6 + v5 = iconst.i64 0 + v0 -> v5 + v2 = load.i32 aligned v0 + v3 = load.i32 aligned v1 + v4 = icmp eq v2, v3 + return v4", + |builder, target, x, y| { + builder.emit_small_memory_compare( + target.frontend_config(), + IntCC::Equal, + x, + y, + 4, + align_four, + align_four, + MemFlags::new(), + ) + }, + ); + } + + #[test] + fn small_memcmp_ipv6_ne() { + let align_two = std::num::NonZeroU8::new(2).unwrap(); + small_memcmp_helper( + " +block0: + v6 = iconst.i64 0 + v1 -> v6 + v5 = iconst.i64 0 + v0 -> v5 + v2 = load.i128 v0 + v3 = load.i128 v1 + v4 = icmp ne v2, v3 + return v4", + |builder, target, x, y| { + builder.emit_small_memory_compare( + target.frontend_config(), + IntCC::NotEqual, + x, + y, + 16, + align_two, + align_two, + MemFlags::new(), + ) + }, + ); + } + + #[test] + fn small_memcmp_odd_size_uge() { + let one = std::num::NonZeroU8::new(1).unwrap(); + small_memcmp_helper( + " + sig0 = (i64, i64, i64) -> i32 system_v + fn0 = %Memcmp sig0 + +block0: + v6 = iconst.i64 0 + v1 -> v6 + v5 = iconst.i64 0 + v0 -> v5 + v2 = iconst.i64 3 + v3 = call fn0(v0, v1, v2) + v4 = icmp_imm sge v3, 0 + return v4", + |builder, target, x, y| { + builder.emit_small_memory_compare( + target.frontend_config(), + IntCC::UnsignedGreaterThanOrEqual, + x, + y, + 3, + one, + one, + MemFlags::new(), + ) + }, + ); + } + + fn small_memcmp_helper( + expected: &str, + f: impl FnOnce(&mut FunctionBuilder, &dyn TargetIsa, Value, Value) -> Value, + ) { + use core::str::FromStr; + use cranelift_codegen::isa; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + let triple = + ::target_lexicon::Triple::from_str("x86_64").expect("Couldn't create x86_64 triple"); + + let target = isa::lookup(triple) + .ok() + .map(|b| b.finish(shared_flags)) + .expect("This test requires x86_64 support."); + + let mut sig = Signature::new(target.default_call_conv()); + sig.returns.push(AbiParam::new(B1)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let x = Variable::new(0); + let y = Variable::new(1); + builder.declare_var(x, target.pointer_type()); + builder.declare_var(y, target.pointer_type()); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + + let left = builder.use_var(x); + let right = builder.use_var(y); + let ret = f(&mut builder, &*target, left, right); + builder.ins().return_(&[ret]); + + builder.seal_all_blocks(); + builder.finalize(); + } + + let actual_ir = func.display().to_string(); + let expected_ir = format!("function %sample() -> b1 system_v {{{}\n}}\n", expected); + assert!( + expected_ir == actual_ir, + "Expected\n{}, but got\n{}", + expected_ir, + actual_ir + ); + } + #[test] fn undef_vector_vars() { let mut sig = Signature::new(CallConv::SystemV); From ca7c54b5f8f00865a06241030fa6140bd6d1c036 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Mon, 29 Nov 2021 16:48:03 -0800 Subject: [PATCH 3/3] Add `Type::int_with_byte_size` constructor --- cranelift/codegen/src/ir/types.rs | 27 +++++++++++++++++++++++++++ cranelift/frontend/src/frontend.rs | 9 ++------- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index bb2056926b..59e6a871b9 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -104,6 +104,8 @@ impl Type { } /// Get an integer type with the requested number of bits. + /// + /// For the same thing but in *bytes*, use [`Self::int_with_byte_size`]. pub fn int(bits: u16) -> Option { match bits { 8 => Some(I8), @@ -115,6 +117,13 @@ impl Type { } } + /// Get an integer type with the requested number of bytes. + /// + /// For the same thing but in *bits*, use [`Self::int`]. + pub fn int_with_byte_size(bytes: u16) -> Option { + Self::int(bytes.checked_mul(8)?) + } + /// Get a type with the same number of lanes as `self`, but using `lane` as the lane type. fn replace_lanes(self, lane: Self) -> Self { debug_assert!(lane.is_lane() && !self.is_special()); @@ -595,4 +604,22 @@ mod tests { assert_eq!(B8.as_int(), I8); assert_eq!(B128.as_int(), I128); } + + #[test] + fn int_from_size() { + assert_eq!(Type::int(0), None); + assert_eq!(Type::int(8), Some(I8)); + assert_eq!(Type::int(33), None); + assert_eq!(Type::int(64), Some(I64)); + + assert_eq!(Type::int_with_byte_size(0), None); + assert_eq!(Type::int_with_byte_size(2), Some(I16)); + assert_eq!(Type::int_with_byte_size(6), None); + assert_eq!(Type::int_with_byte_size(16), Some(I128)); + + // Ensure `int_with_byte_size` handles overflow properly + let evil = 0xE001_u16; + assert_eq!(evil.wrapping_mul(8), 8, "check the constant is correct"); + assert_eq!(Type::int_with_byte_size(evil), None); + } } diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index eb87cb0e66..7611e69c0a 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -13,6 +13,7 @@ use cranelift_codegen::ir::{ }; use cranelift_codegen::isa::TargetFrontendConfig; use cranelift_codegen::packed_option::PackedOption; +use std::convert::TryInto; // FIXME: Remove in edition2021 /// Structure used for translating a series of functions into Cranelift IR. /// @@ -892,13 +893,7 @@ impl<'a> FunctionBuilder<'a> { } // Future work could consider expanding this to handle more-complex scenarios. - fn type_from_byte_size(size: u64) -> Option { - if size > 16 { - return None; - } - Type::int(size as u16 * 8) - } - if let Some(small_type) = type_from_byte_size(size) { + if let Some(small_type) = size.try_into().ok().and_then(Type::int_with_byte_size) { if let Equal | NotEqual = zero_cc { let mut left_flags = flags; if size == left_align.get().into() {