Add basic GuestString support to wiggle (#13)

* Add basic GuestString support to wiggle

This commit adds basic `GuestString` support to `wiggle`. `GuestString`
is a wrapper around `GuestArray<'_, u8>` array type which itself can
be made into either an owned (cloned) Rust `String` or borrowed as
a reference `&str`. In both cases, `GuestString` ensures that the
underlying bytes are valid Unicode code units, throwing a `InvalidUtf8`
error if not.

This commit adds support *only* for passing in strings as arguments
in WASI. Marshalling of the return arg has not yet been implemented.
I'm not even sure it's possible without multi-value return args
feature of Wasm. It's not a major setback especially since the WASI
spec (and this includes even the `ephemeral` snapshot) doesn't
return strings anywhere. They are only ever passed in as arguments
to interface functions.

It should be noted that error returned in case of invalid UTF-8
requires a lot more love as it doesn't include anything besides
flagging an event that the string contained an invalid Unicode code unit.

* Borrow all of string's memory including nul-byte

Borrow all of string's underlying memory including the nul-byte.
This perhaps might not have a tremendous impact on anything, but
since the nul-byte is technically part of the WASI string, we should
include it in the borrow as well.

* Fill in wiggle-generate blanks for strings

* Print to screen passed string in proptest

* Strings are PointerLengthPairs!

* Fix generation of strings in compound types

* Update test with simple string strategy

* Generate better test strings

* Finalise proptest for strings

* Fix formatting

* Update crates/runtime/src/memory/string.rs

Removes unnecessary comment in code

* Apply Pat's suggestion to wrap Utf8Error as error
This commit is contained in:
Jakub Konka
2020-02-21 22:37:22 +01:00
committed by GitHub
parent 2f223acc55
commit 6ab3ff71d2
9 changed files with 273 additions and 13 deletions

View File

@@ -27,4 +27,6 @@ pub enum GuestError {
#[source]
err: Box<GuestError>,
},
#[error("Invalid UTF-8 encountered")]
InvalidUtf8(#[from] std::str::Utf8Error),
}

View File

@@ -6,5 +6,8 @@ mod region;
pub use error::GuestError;
pub use guest_type::{GuestErrorType, GuestType, GuestTypeClone, GuestTypeCopy};
pub use memory::{GuestArray, GuestMemory, GuestPtr, GuestPtrMut, GuestRef, GuestRefMut};
pub use memory::{
GuestArray, GuestMemory, GuestPtr, GuestPtrMut, GuestRef, GuestRefMut, GuestString,
GuestStringRef,
};
pub use region::Region;

View File

@@ -1,8 +1,10 @@
mod array;
mod ptr;
mod string;
pub use array::*;
pub use ptr::*;
pub use string::*;
use crate::{borrow::GuestBorrows, GuestError, GuestType, Region};
use std::{cell::RefCell, fmt, marker::PhantomData, rc::Rc};

View File

@@ -0,0 +1,124 @@
use super::array::{GuestArray, GuestArrayRef};
use crate::GuestError;
use std::fmt;
pub struct GuestString<'a> {
pub(super) array: GuestArray<'a, u8>,
}
impl<'a> fmt::Debug for GuestString<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "GuestString {{ array: {:?} }}", self.array)
}
}
impl<'a> GuestString<'a> {
pub fn as_ref(&self) -> Result<GuestStringRef<'a>, GuestError> {
let ref_ = self.array.as_ref()?;
Ok(GuestStringRef { ref_ })
}
pub fn to_string(&self) -> Result<String, GuestError> {
Ok(self.as_ref()?.as_str()?.to_owned())
}
}
impl<'a> From<GuestArray<'a, u8>> for GuestString<'a> {
fn from(array: GuestArray<'a, u8>) -> Self {
Self { array }
}
}
pub struct GuestStringRef<'a> {
pub(super) ref_: GuestArrayRef<'a, u8>,
}
impl<'a> fmt::Debug for GuestStringRef<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "GuestStringRef {{ ref_: {:?} }}", self.ref_)
}
}
impl<'a> GuestStringRef<'a> {
pub fn as_str(&self) -> Result<&str, GuestError> {
std::str::from_utf8(&*self.ref_).map_err(Into::into)
}
}
#[cfg(test)]
mod test {
use super::{
super::{
ptr::{GuestPtr, GuestPtrMut},
GuestError, GuestMemory,
},
GuestString,
};
#[repr(align(4096))]
struct HostMemory {
buffer: [u8; 4096],
}
impl HostMemory {
pub fn new() -> Self {
Self { buffer: [0; 4096] }
}
pub fn as_mut_ptr(&mut self) -> *mut u8 {
self.buffer.as_mut_ptr()
}
pub fn len(&self) -> usize {
self.buffer.len()
}
}
#[test]
fn valid_utf8() {
let mut host_memory = HostMemory::new();
let guest_memory = GuestMemory::new(host_memory.as_mut_ptr(), host_memory.len() as u32);
// write string into memory
let mut ptr: GuestPtrMut<u8> = guest_memory.ptr_mut(0).expect("ptr mut to start of string");
let input_str = "cześć WASI!";
for byte in input_str.as_bytes() {
let mut ref_mut = ptr.as_ref_mut().expect("valid deref");
*ref_mut = *byte;
ptr = ptr.elem(1).expect("next ptr");
}
// read the string as GuestString
let ptr: GuestPtr<u8> = guest_memory.ptr(0).expect("ptr to start of string");
let guest_string: GuestString<'_> = ptr
.array(input_str.len() as u32)
.expect("valid null-terminated string")
.into();
let as_ref = guest_string.as_ref().expect("deref");
assert_eq!(as_ref.as_str().expect("valid UTF-8"), input_str);
}
#[test]
fn invalid_utf8() {
let mut host_memory = HostMemory::new();
let guest_memory = GuestMemory::new(host_memory.as_mut_ptr(), host_memory.len() as u32);
// write string into memory
let mut ptr: GuestPtrMut<u8> = guest_memory.ptr_mut(0).expect("ptr mut to start of string");
let input_str = "cześć WASI!";
let mut bytes = input_str.as_bytes().to_vec();
// insert 0xFE which is an invalid UTF-8 byte
bytes[5] = 0xfe;
for byte in &bytes {
let mut ref_mut = ptr.as_ref_mut().expect("valid deref");
*ref_mut = *byte;
ptr = ptr.elem(1).expect("next ptr");
}
// read the string as GuestString
let ptr: GuestPtr<u8> = guest_memory.ptr(0).expect("ptr to start of string");
let guest_string: GuestString<'_> = ptr
.array(bytes.len() as u32)
.expect("valid null-terminated string")
.into();
let as_ref = guest_string.as_ref().expect("deref");
match as_ref.as_str().expect_err("should fail") {
GuestError::InvalidUtf8(_) => {}
x => assert!(false, "expected GuestError::InvalidUtf8(_), got {:?}", x),
}
}
}