From 6b86984c418ba09a23942dc7569a89d83f246164 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Wed, 21 Jul 2021 11:19:40 -0700 Subject: [PATCH] x64: avoid load-coalescing SIMD operations with non-aligned loads Fixes #2943, though not as optimally as may be desired. With x64 SIMD instructions, the memory operand must be aligned--this change adds that check. There are cases, however, where we can do better--see #3106. --- cranelift/codegen/src/ir/instructions.rs | 15 +++++++++++++++ cranelift/codegen/src/isa/x64/lower.rs | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index 94ce878a97..993aa69c06 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -24,6 +24,8 @@ use crate::data_value::DataValue; use crate::entity; use ir::condcodes::{FloatCC, IntCC}; +use super::MemFlags; + /// Some instructions use an external list of argument values because there is not enough space in /// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in /// `dfg.value_lists`. @@ -395,6 +397,19 @@ impl InstructionData { } } + /// If this is a load/store instruction, return its memory flags. + pub fn memflags(&self) -> Option { + match self { + &InstructionData::Load { flags, .. } + | &InstructionData::LoadComplex { flags, .. } + | &InstructionData::LoadNoOffset { flags, .. } + | &InstructionData::Store { flags, .. } + | &InstructionData::StoreComplex { flags, .. } + | &InstructionData::StoreNoOffset { flags, .. } => Some(flags), + _ => None, + } + } + /// Return information about a call instruction. /// /// Any instruction that can call another function reveals its call signature here. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index fcb5e71355..9bdecdfe76 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -153,6 +153,12 @@ fn is_mergeable_load>( return None; } + // SIMD instructions can only be load-coalesced when the loaded value comes + // from an aligned address. + if load_ty.is_vector() && !insn_data.memflags().map_or(false, |f| f.aligned()) { + return None; + } + // Just testing the opcode is enough, because the width will always match if // the type does (and the type should match if the CLIF is properly // constructed).