* wiggle: adapt Wiggle guest slices for `unsafe` shared use When multiple threads can concurrently modify a WebAssembly shared memory, the underlying data for a Wiggle `GuestSlice` and `GuestSliceMut` could change due to access from other threads. This breaks Rust guarantees when `&[T]` and `&mut [T]` slices are handed out. This change modifies `GuestPtr` to make `as_slice` and `as_slice_mut` return an `Option` which is `None` when the underlying WebAssembly memory is shared. But WASI implementations still need access to the underlying WebAssembly memory, both to read to it and write from it. This change adds new APIs: - `GuestPtr::to_vec` copies the bytes from WebAssembly memory (from which we can safely take a `&[T]`) - `GuestPtr::as_unsafe_slice_mut` returns a wrapper `struct` from which we can `unsafe`-ly return a mutable slice (users must accept the unsafety of concurrently modifying a `&mut [T]`) This approach allows us to maintain Wiggle's borrow-checking infrastructure, which enforces the guarantee that Wiggle will not modify overlapping regions, e.g. This is important because the underlying system calls may expect this. Though other threads may modify the same underlying region, this is impossible to prevent; at least Wiggle will not be able to do so. Finally, the changes to Wiggle's API are propagated to all WASI implementations in Wasmtime. For now, code locations that attempt to get a guest slice will panic if the underlying memory is shared. Note that Wiggle is not enabled for shared memory (that will come later in something like #5054), but when it is, these panics will be clear indicators of locations that must be re-implemented in a thread-safe way. * review: remove double cast * review: refactor to include more logic in 'UnsafeGuestSlice' * review: add reference to #4203 * review: link all thread-safe WASI fixups to #5235 * fix: consume 'UnsafeGuestSlice' during conversion to safe versions * review: remove 'as_slice' and 'as_slice_mut' * review: use 'as_unsafe_slice_mut' in 'to_vec' * review: add `UnsafeBorrowResult`
159 lines
5.8 KiB
Rust
159 lines
5.8 KiB
Rust
//! Implements the wasi-nn API.
|
|
use crate::api::{Backend, BackendError, BackendExecutionContext, BackendGraph};
|
|
use crate::witx::types::{ExecutionTarget, GraphBuilderArray, Tensor, TensorType};
|
|
use openvino::{InferenceError, Layout, Precision, SetupError, TensorDesc};
|
|
use std::sync::Arc;
|
|
|
|
#[derive(Default)]
|
|
pub(crate) struct OpenvinoBackend(Option<openvino::Core>);
|
|
|
|
impl Backend for OpenvinoBackend {
|
|
fn name(&self) -> &str {
|
|
"openvino"
|
|
}
|
|
|
|
fn load(
|
|
&mut self,
|
|
builders: &GraphBuilderArray<'_>,
|
|
target: ExecutionTarget,
|
|
) -> Result<Box<dyn BackendGraph>, BackendError> {
|
|
if builders.len() != 2 {
|
|
return Err(BackendError::InvalidNumberOfBuilders(2, builders.len()).into());
|
|
}
|
|
|
|
// Construct the context if none is present; this is done lazily (i.e.
|
|
// upon actually loading a model) because it may fail to find and load
|
|
// the OpenVINO libraries. The laziness limits the extent of the error
|
|
// only to wasi-nn users, not all WASI users.
|
|
if self.0.is_none() {
|
|
self.0.replace(openvino::Core::new(None)?);
|
|
}
|
|
|
|
// Read the guest array.
|
|
let builders = builders.as_ptr();
|
|
let xml = builders
|
|
.read()?
|
|
.as_slice()?
|
|
.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)");
|
|
let weights = builders
|
|
.add(1)?
|
|
.read()?
|
|
.as_slice()?
|
|
.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)");
|
|
|
|
// Construct OpenVINO graph structures: `cnn_network` contains the graph
|
|
// structure, `exec_network` can perform inference.
|
|
let core = self
|
|
.0
|
|
.as_mut()
|
|
.expect("openvino::Core was previously constructed");
|
|
let mut cnn_network = core.read_network_from_buffer(&xml, &weights)?;
|
|
|
|
// TODO this is a temporary workaround. We need a more eligant way to specify the layout in the long run.
|
|
// However, without this newer versions of OpenVINO will fail due to parameter mismatch.
|
|
for i in 0..cnn_network.get_inputs_len()? {
|
|
let name = cnn_network.get_input_name(i)?;
|
|
cnn_network.set_input_layout(&name, Layout::NHWC)?;
|
|
}
|
|
|
|
let exec_network =
|
|
core.load_network(&cnn_network, map_execution_target_to_string(target))?;
|
|
|
|
Ok(Box::new(OpenvinoGraph(Arc::new(cnn_network), exec_network)))
|
|
}
|
|
}
|
|
|
|
struct OpenvinoGraph(Arc<openvino::CNNNetwork>, openvino::ExecutableNetwork);
|
|
|
|
impl BackendGraph for OpenvinoGraph {
|
|
fn init_execution_context(&mut self) -> Result<Box<dyn BackendExecutionContext>, BackendError> {
|
|
let infer_request = self.1.create_infer_request()?;
|
|
Ok(Box::new(OpenvinoExecutionContext(
|
|
self.0.clone(),
|
|
infer_request,
|
|
)))
|
|
}
|
|
}
|
|
|
|
struct OpenvinoExecutionContext(Arc<openvino::CNNNetwork>, openvino::InferRequest);
|
|
|
|
impl BackendExecutionContext for OpenvinoExecutionContext {
|
|
fn set_input(&mut self, index: u32, tensor: &Tensor<'_>) -> Result<(), BackendError> {
|
|
let input_name = self.0.get_input_name(index as usize)?;
|
|
|
|
// Construct the blob structure.
|
|
let dimensions = tensor
|
|
.dimensions
|
|
.as_slice()?
|
|
.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)")
|
|
.iter()
|
|
.map(|d| *d as usize)
|
|
.collect::<Vec<_>>();
|
|
let precision = map_tensor_type_to_precision(tensor.type_);
|
|
|
|
// TODO There must be some good way to discover the layout here; this
|
|
// should not have to default to NHWC.
|
|
let desc = TensorDesc::new(Layout::NHWC, &dimensions, precision);
|
|
let data = tensor
|
|
.data
|
|
.as_slice()?
|
|
.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)");
|
|
let blob = openvino::Blob::new(&desc, &data)?;
|
|
|
|
// Actually assign the blob to the request.
|
|
self.1.set_blob(&input_name, &blob)?;
|
|
Ok(())
|
|
}
|
|
|
|
fn compute(&mut self) -> Result<(), BackendError> {
|
|
self.1.infer()?;
|
|
Ok(())
|
|
}
|
|
|
|
fn get_output(&mut self, index: u32, destination: &mut [u8]) -> Result<u32, BackendError> {
|
|
let output_name = self.0.get_output_name(index as usize)?;
|
|
let mut blob = self.1.get_blob(&output_name)?;
|
|
let blob_size = blob.byte_len()?;
|
|
if blob_size > destination.len() {
|
|
return Err(BackendError::NotEnoughMemory(blob_size));
|
|
}
|
|
|
|
// Copy the tensor data into the destination buffer.
|
|
destination[..blob_size].copy_from_slice(blob.buffer()?);
|
|
Ok(blob_size as u32)
|
|
}
|
|
}
|
|
|
|
impl From<InferenceError> for BackendError {
|
|
fn from(e: InferenceError) -> Self {
|
|
BackendError::BackendAccess(anyhow::Error::new(e))
|
|
}
|
|
}
|
|
|
|
impl From<SetupError> for BackendError {
|
|
fn from(e: SetupError) -> Self {
|
|
BackendError::BackendAccess(anyhow::Error::new(e))
|
|
}
|
|
}
|
|
|
|
/// Return the execution target string expected by OpenVINO from the
|
|
/// `ExecutionTarget` enum provided by wasi-nn.
|
|
fn map_execution_target_to_string(target: ExecutionTarget) -> &'static str {
|
|
match target {
|
|
ExecutionTarget::Cpu => "CPU",
|
|
ExecutionTarget::Gpu => "GPU",
|
|
ExecutionTarget::Tpu => unimplemented!("OpenVINO does not support TPU execution targets"),
|
|
}
|
|
}
|
|
|
|
/// Return OpenVINO's precision type for the `TensorType` enum provided by
|
|
/// wasi-nn.
|
|
fn map_tensor_type_to_precision(tensor_type: TensorType) -> openvino::Precision {
|
|
match tensor_type {
|
|
TensorType::F16 => Precision::FP16,
|
|
TensorType::F32 => Precision::FP32,
|
|
TensorType::U8 => Precision::U8,
|
|
TensorType::I32 => Precision::I32,
|
|
}
|
|
}
|