* Add basic GuestString support to wiggle This commit adds basic `GuestString` support to `wiggle`. `GuestString` is a wrapper around `GuestArray<'_, u8>` array type which itself can be made into either an owned (cloned) Rust `String` or borrowed as a reference `&str`. In both cases, `GuestString` ensures that the underlying bytes are valid Unicode code units, throwing a `InvalidUtf8` error if not. This commit adds support *only* for passing in strings as arguments in WASI. Marshalling of the return arg has not yet been implemented. I'm not even sure it's possible without multi-value return args feature of Wasm. It's not a major setback especially since the WASI spec (and this includes even the `ephemeral` snapshot) doesn't return strings anywhere. They are only ever passed in as arguments to interface functions. It should be noted that error returned in case of invalid UTF-8 requires a lot more love as it doesn't include anything besides flagging an event that the string contained an invalid Unicode code unit. * Borrow all of string's memory including nul-byte Borrow all of string's underlying memory including the nul-byte. This perhaps might not have a tremendous impact on anything, but since the nul-byte is technically part of the WASI string, we should include it in the borrow as well. * Fill in wiggle-generate blanks for strings * Print to screen passed string in proptest * Strings are PointerLengthPairs! * Fix generation of strings in compound types * Update test with simple string strategy * Generate better test strings * Finalise proptest for strings * Fix formatting * Update crates/runtime/src/memory/string.rs Removes unnecessary comment in code * Apply Pat's suggestion to wrap Utf8Error as error
125 lines
3.9 KiB
Rust
125 lines
3.9 KiB
Rust
use super::array::{GuestArray, GuestArrayRef};
|
|
use crate::GuestError;
|
|
use std::fmt;
|
|
|
|
pub struct GuestString<'a> {
|
|
pub(super) array: GuestArray<'a, u8>,
|
|
}
|
|
|
|
impl<'a> fmt::Debug for GuestString<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "GuestString {{ array: {:?} }}", self.array)
|
|
}
|
|
}
|
|
|
|
impl<'a> GuestString<'a> {
|
|
pub fn as_ref(&self) -> Result<GuestStringRef<'a>, GuestError> {
|
|
let ref_ = self.array.as_ref()?;
|
|
Ok(GuestStringRef { ref_ })
|
|
}
|
|
|
|
pub fn to_string(&self) -> Result<String, GuestError> {
|
|
Ok(self.as_ref()?.as_str()?.to_owned())
|
|
}
|
|
}
|
|
|
|
impl<'a> From<GuestArray<'a, u8>> for GuestString<'a> {
|
|
fn from(array: GuestArray<'a, u8>) -> Self {
|
|
Self { array }
|
|
}
|
|
}
|
|
|
|
pub struct GuestStringRef<'a> {
|
|
pub(super) ref_: GuestArrayRef<'a, u8>,
|
|
}
|
|
|
|
impl<'a> fmt::Debug for GuestStringRef<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "GuestStringRef {{ ref_: {:?} }}", self.ref_)
|
|
}
|
|
}
|
|
|
|
impl<'a> GuestStringRef<'a> {
|
|
pub fn as_str(&self) -> Result<&str, GuestError> {
|
|
std::str::from_utf8(&*self.ref_).map_err(Into::into)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::{
|
|
super::{
|
|
ptr::{GuestPtr, GuestPtrMut},
|
|
GuestError, GuestMemory,
|
|
},
|
|
GuestString,
|
|
};
|
|
|
|
#[repr(align(4096))]
|
|
struct HostMemory {
|
|
buffer: [u8; 4096],
|
|
}
|
|
|
|
impl HostMemory {
|
|
pub fn new() -> Self {
|
|
Self { buffer: [0; 4096] }
|
|
}
|
|
pub fn as_mut_ptr(&mut self) -> *mut u8 {
|
|
self.buffer.as_mut_ptr()
|
|
}
|
|
pub fn len(&self) -> usize {
|
|
self.buffer.len()
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn valid_utf8() {
|
|
let mut host_memory = HostMemory::new();
|
|
let guest_memory = GuestMemory::new(host_memory.as_mut_ptr(), host_memory.len() as u32);
|
|
// write string into memory
|
|
let mut ptr: GuestPtrMut<u8> = guest_memory.ptr_mut(0).expect("ptr mut to start of string");
|
|
let input_str = "cześć WASI!";
|
|
for byte in input_str.as_bytes() {
|
|
let mut ref_mut = ptr.as_ref_mut().expect("valid deref");
|
|
*ref_mut = *byte;
|
|
ptr = ptr.elem(1).expect("next ptr");
|
|
}
|
|
// read the string as GuestString
|
|
let ptr: GuestPtr<u8> = guest_memory.ptr(0).expect("ptr to start of string");
|
|
let guest_string: GuestString<'_> = ptr
|
|
.array(input_str.len() as u32)
|
|
.expect("valid null-terminated string")
|
|
.into();
|
|
let as_ref = guest_string.as_ref().expect("deref");
|
|
assert_eq!(as_ref.as_str().expect("valid UTF-8"), input_str);
|
|
}
|
|
|
|
#[test]
|
|
fn invalid_utf8() {
|
|
let mut host_memory = HostMemory::new();
|
|
let guest_memory = GuestMemory::new(host_memory.as_mut_ptr(), host_memory.len() as u32);
|
|
// write string into memory
|
|
let mut ptr: GuestPtrMut<u8> = guest_memory.ptr_mut(0).expect("ptr mut to start of string");
|
|
let input_str = "cześć WASI!";
|
|
let mut bytes = input_str.as_bytes().to_vec();
|
|
// insert 0xFE which is an invalid UTF-8 byte
|
|
bytes[5] = 0xfe;
|
|
for byte in &bytes {
|
|
let mut ref_mut = ptr.as_ref_mut().expect("valid deref");
|
|
*ref_mut = *byte;
|
|
ptr = ptr.elem(1).expect("next ptr");
|
|
}
|
|
// read the string as GuestString
|
|
let ptr: GuestPtr<u8> = guest_memory.ptr(0).expect("ptr to start of string");
|
|
let guest_string: GuestString<'_> = ptr
|
|
.array(bytes.len() as u32)
|
|
.expect("valid null-terminated string")
|
|
.into();
|
|
let as_ref = guest_string.as_ref().expect("deref");
|
|
match as_ref.as_str().expect_err("should fail") {
|
|
GuestError::InvalidUtf8(_) => {}
|
|
x => assert!(false, "expected GuestError::InvalidUtf8(_), got {:?}", x),
|
|
}
|
|
}
|
|
}
|