Implement char type in adapter fusion (#4544)
This commit implements the translation of `char` which validates that it's in the valid range of unicode scalar values. The precise validation here is lifted from LLVM in the hopes that it's probably better than whatever I would concoct by hand.
This commit is contained in:
@@ -51,6 +51,7 @@ enum ValType {
|
||||
S64,
|
||||
Float32,
|
||||
Float64,
|
||||
Char,
|
||||
Record(Vec<ValType>),
|
||||
Tuple(Vec<ValType>),
|
||||
Variant(NonZeroLenVec<ValType>),
|
||||
@@ -206,6 +207,7 @@ fn intern(types: &mut ComponentTypesBuilder, ty: &ValType) -> InterfaceType {
|
||||
ValType::S64 => InterfaceType::S64,
|
||||
ValType::Float32 => InterfaceType::Float32,
|
||||
ValType::Float64 => InterfaceType::Float64,
|
||||
ValType::Char => InterfaceType::Char,
|
||||
ValType::Record(tys) => {
|
||||
let ty = TypeRecord {
|
||||
fields: tys
|
||||
|
||||
@@ -346,6 +346,7 @@ impl Compiler<'_, '_> {
|
||||
InterfaceType::S64 => self.translate_s64(src, dst_ty, dst),
|
||||
InterfaceType::Float32 => self.translate_f32(src, dst_ty, dst),
|
||||
InterfaceType::Float64 => self.translate_f64(src, dst_ty, dst),
|
||||
InterfaceType::Char => self.translate_char(src, dst_ty, dst),
|
||||
InterfaceType::Record(t) => self.translate_record(*t, src, dst_ty, dst),
|
||||
InterfaceType::Tuple(t) => self.translate_tuple(*t, src, dst_ty, dst),
|
||||
InterfaceType::Variant(v) => self.translate_variant(*v, src, dst_ty, dst),
|
||||
@@ -544,6 +545,58 @@ impl Compiler<'_, '_> {
|
||||
}
|
||||
}
|
||||
|
||||
fn translate_char(&mut self, src: &Source<'_>, dst_ty: &InterfaceType, dst: &Destination) {
|
||||
assert!(matches!(dst_ty, InterfaceType::Char));
|
||||
let local = self.gen_local(ValType::I32);
|
||||
match src {
|
||||
Source::Memory(mem) => self.i32_load(mem),
|
||||
Source::Stack(stack) => self.stack_get(stack, ValType::I32),
|
||||
}
|
||||
self.instruction(LocalSet(local));
|
||||
|
||||
// This sequence is copied from the output of LLVM for:
|
||||
//
|
||||
// pub extern "C" fn foo(x: u32) -> char {
|
||||
// char::try_from(x)
|
||||
// .unwrap_or_else(|_| std::arch::wasm32::unreachable())
|
||||
// }
|
||||
//
|
||||
// Apparently this does what's required by the canonical ABI:
|
||||
//
|
||||
// def i32_to_char(opts, i):
|
||||
// trap_if(i >= 0x110000)
|
||||
// trap_if(0xD800 <= i <= 0xDFFF)
|
||||
// return chr(i)
|
||||
//
|
||||
// ... but I don't know how it works other than "well I trust LLVM"
|
||||
self.instruction(Block(BlockType::Empty));
|
||||
self.instruction(Block(BlockType::Empty));
|
||||
self.instruction(LocalGet(local));
|
||||
self.instruction(I32Const(0xd800));
|
||||
self.instruction(I32Xor);
|
||||
self.instruction(I32Const(-0x110000));
|
||||
self.instruction(I32Add);
|
||||
self.instruction(I32Const(-0x10f800));
|
||||
self.instruction(I32LtU);
|
||||
self.instruction(BrIf(0));
|
||||
self.instruction(LocalGet(local));
|
||||
self.instruction(I32Const(0x110000));
|
||||
self.instruction(I32Ne);
|
||||
self.instruction(BrIf(1));
|
||||
self.instruction(End);
|
||||
self.trap(Trap::InvalidChar);
|
||||
self.instruction(End);
|
||||
|
||||
self.push_dst_addr(dst);
|
||||
self.instruction(LocalGet(local));
|
||||
match dst {
|
||||
Destination::Memory(mem) => {
|
||||
self.i32_store(mem);
|
||||
}
|
||||
Destination::Stack(stack) => self.stack_set(stack, ValType::I32),
|
||||
}
|
||||
}
|
||||
|
||||
fn translate_record(
|
||||
&mut self,
|
||||
src_ty: TypeRecordIndex,
|
||||
|
||||
@@ -28,6 +28,7 @@ pub enum Trap {
|
||||
CannotEnter,
|
||||
UnalignedPointer,
|
||||
InvalidDiscriminant,
|
||||
InvalidChar,
|
||||
AssertFailed(&'static str),
|
||||
}
|
||||
|
||||
@@ -101,6 +102,7 @@ impl fmt::Display for Trap {
|
||||
Trap::CannotEnter => "cannot enter instance".fmt(f),
|
||||
Trap::UnalignedPointer => "pointer not aligned correctly".fmt(f),
|
||||
Trap::InvalidDiscriminant => "invalid variant discriminant".fmt(f),
|
||||
Trap::InvalidChar => "invalid char value specified".fmt(f),
|
||||
Trap::AssertFailed(s) => write!(f, "assertion failure: {}", s),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1107,3 +1107,121 @@
|
||||
(instance $c1 (instantiate $c1))
|
||||
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||
)
|
||||
|
||||
;; roundtrip some valid chars
|
||||
(component
|
||||
(component $c1
|
||||
(core module $m
|
||||
(func (export "a") (param i32) (result i32) local.get 0)
|
||||
)
|
||||
(core instance $m (instantiate $m))
|
||||
(func (export "a") (param char) (result char) (canon lift (core func $m "a")))
|
||||
)
|
||||
(component $c2
|
||||
(import "" (instance $i
|
||||
(export "a" (func (param char) (result char)))
|
||||
))
|
||||
|
||||
(core func $a (canon lower (func $i "a")))
|
||||
|
||||
(core module $m
|
||||
(import "" "a" (func $a (param i32) (result i32)))
|
||||
|
||||
(func $start
|
||||
(call $roundtrip (i32.const 0))
|
||||
(call $roundtrip (i32.const 0xab))
|
||||
(call $roundtrip (i32.const 0xd7ff))
|
||||
(call $roundtrip (i32.const 0xe000))
|
||||
(call $roundtrip (i32.const 0x10ffff))
|
||||
)
|
||||
(func $roundtrip (export "roundtrip") (param i32)
|
||||
local.get 0
|
||||
call $a
|
||||
local.get 0
|
||||
i32.ne
|
||||
if unreachable end
|
||||
)
|
||||
(start $start)
|
||||
)
|
||||
(core instance $m (instantiate $m
|
||||
(with "" (instance
|
||||
(export "a" (func $a))
|
||||
))
|
||||
))
|
||||
|
||||
(func (export "roundtrip") (param char) (canon lift (core func $m "roundtrip")))
|
||||
)
|
||||
(instance $c1 (instantiate $c1))
|
||||
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||
|
||||
(export "roundtrip" (func $c2 "roundtrip"))
|
||||
)
|
||||
|
||||
(assert_return (invoke "roundtrip" (char.const "x")) (unit.const))
|
||||
(assert_return (invoke "roundtrip" (char.const "⛳")) (unit.const))
|
||||
(assert_return (invoke "roundtrip" (char.const "🍰")) (unit.const))
|
||||
|
||||
;; invalid chars
|
||||
(assert_trap
|
||||
(component
|
||||
(component $c1
|
||||
(core module $m (func (export "a") (param i32)))
|
||||
(core instance $m (instantiate $m))
|
||||
(func (export "a") (param char) (canon lift (core func $m "a")))
|
||||
)
|
||||
(component $c2
|
||||
(import "" (instance $i (export "a" (func (param char)))))
|
||||
(core func $a (canon lower (func $i "a")))
|
||||
(core module $m
|
||||
(import "" "a" (func $a (param i32)))
|
||||
(func $start (call $a (i32.const 0xd800)))
|
||||
(start $start)
|
||||
)
|
||||
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
|
||||
)
|
||||
(instance $c1 (instantiate $c1))
|
||||
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||
)
|
||||
"unreachable")
|
||||
(assert_trap
|
||||
(component
|
||||
(component $c1
|
||||
(core module $m (func (export "a") (param i32)))
|
||||
(core instance $m (instantiate $m))
|
||||
(func (export "a") (param char) (canon lift (core func $m "a")))
|
||||
)
|
||||
(component $c2
|
||||
(import "" (instance $i (export "a" (func (param char)))))
|
||||
(core func $a (canon lower (func $i "a")))
|
||||
(core module $m
|
||||
(import "" "a" (func $a (param i32)))
|
||||
(func $start (call $a (i32.const 0xdfff)))
|
||||
(start $start)
|
||||
)
|
||||
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
|
||||
)
|
||||
(instance $c1 (instantiate $c1))
|
||||
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||
)
|
||||
"unreachable")
|
||||
(assert_trap
|
||||
(component
|
||||
(component $c1
|
||||
(core module $m (func (export "a") (param i32)))
|
||||
(core instance $m (instantiate $m))
|
||||
(func (export "a") (param char) (canon lift (core func $m "a")))
|
||||
)
|
||||
(component $c2
|
||||
(import "" (instance $i (export "a" (func (param char)))))
|
||||
(core func $a (canon lower (func $i "a")))
|
||||
(core module $m
|
||||
(import "" "a" (func $a (param i32)))
|
||||
(func $start (call $a (i32.const 0x110000)))
|
||||
(start $start)
|
||||
)
|
||||
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
|
||||
)
|
||||
(instance $c1 (instantiate $c1))
|
||||
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||
)
|
||||
"unreachable")
|
||||
|
||||
Reference in New Issue
Block a user