Implement char type in adapter fusion (#4544)
This commit implements the translation of `char` which validates that it's in the valid range of unicode scalar values. The precise validation here is lifted from LLVM in the hopes that it's probably better than whatever I would concoct by hand.
This commit is contained in:
@@ -51,6 +51,7 @@ enum ValType {
|
|||||||
S64,
|
S64,
|
||||||
Float32,
|
Float32,
|
||||||
Float64,
|
Float64,
|
||||||
|
Char,
|
||||||
Record(Vec<ValType>),
|
Record(Vec<ValType>),
|
||||||
Tuple(Vec<ValType>),
|
Tuple(Vec<ValType>),
|
||||||
Variant(NonZeroLenVec<ValType>),
|
Variant(NonZeroLenVec<ValType>),
|
||||||
@@ -206,6 +207,7 @@ fn intern(types: &mut ComponentTypesBuilder, ty: &ValType) -> InterfaceType {
|
|||||||
ValType::S64 => InterfaceType::S64,
|
ValType::S64 => InterfaceType::S64,
|
||||||
ValType::Float32 => InterfaceType::Float32,
|
ValType::Float32 => InterfaceType::Float32,
|
||||||
ValType::Float64 => InterfaceType::Float64,
|
ValType::Float64 => InterfaceType::Float64,
|
||||||
|
ValType::Char => InterfaceType::Char,
|
||||||
ValType::Record(tys) => {
|
ValType::Record(tys) => {
|
||||||
let ty = TypeRecord {
|
let ty = TypeRecord {
|
||||||
fields: tys
|
fields: tys
|
||||||
|
|||||||
@@ -346,6 +346,7 @@ impl Compiler<'_, '_> {
|
|||||||
InterfaceType::S64 => self.translate_s64(src, dst_ty, dst),
|
InterfaceType::S64 => self.translate_s64(src, dst_ty, dst),
|
||||||
InterfaceType::Float32 => self.translate_f32(src, dst_ty, dst),
|
InterfaceType::Float32 => self.translate_f32(src, dst_ty, dst),
|
||||||
InterfaceType::Float64 => self.translate_f64(src, dst_ty, dst),
|
InterfaceType::Float64 => self.translate_f64(src, dst_ty, dst),
|
||||||
|
InterfaceType::Char => self.translate_char(src, dst_ty, dst),
|
||||||
InterfaceType::Record(t) => self.translate_record(*t, src, dst_ty, dst),
|
InterfaceType::Record(t) => self.translate_record(*t, src, dst_ty, dst),
|
||||||
InterfaceType::Tuple(t) => self.translate_tuple(*t, src, dst_ty, dst),
|
InterfaceType::Tuple(t) => self.translate_tuple(*t, src, dst_ty, dst),
|
||||||
InterfaceType::Variant(v) => self.translate_variant(*v, src, dst_ty, dst),
|
InterfaceType::Variant(v) => self.translate_variant(*v, src, dst_ty, dst),
|
||||||
@@ -544,6 +545,58 @@ impl Compiler<'_, '_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn translate_char(&mut self, src: &Source<'_>, dst_ty: &InterfaceType, dst: &Destination) {
|
||||||
|
assert!(matches!(dst_ty, InterfaceType::Char));
|
||||||
|
let local = self.gen_local(ValType::I32);
|
||||||
|
match src {
|
||||||
|
Source::Memory(mem) => self.i32_load(mem),
|
||||||
|
Source::Stack(stack) => self.stack_get(stack, ValType::I32),
|
||||||
|
}
|
||||||
|
self.instruction(LocalSet(local));
|
||||||
|
|
||||||
|
// This sequence is copied from the output of LLVM for:
|
||||||
|
//
|
||||||
|
// pub extern "C" fn foo(x: u32) -> char {
|
||||||
|
// char::try_from(x)
|
||||||
|
// .unwrap_or_else(|_| std::arch::wasm32::unreachable())
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Apparently this does what's required by the canonical ABI:
|
||||||
|
//
|
||||||
|
// def i32_to_char(opts, i):
|
||||||
|
// trap_if(i >= 0x110000)
|
||||||
|
// trap_if(0xD800 <= i <= 0xDFFF)
|
||||||
|
// return chr(i)
|
||||||
|
//
|
||||||
|
// ... but I don't know how it works other than "well I trust LLVM"
|
||||||
|
self.instruction(Block(BlockType::Empty));
|
||||||
|
self.instruction(Block(BlockType::Empty));
|
||||||
|
self.instruction(LocalGet(local));
|
||||||
|
self.instruction(I32Const(0xd800));
|
||||||
|
self.instruction(I32Xor);
|
||||||
|
self.instruction(I32Const(-0x110000));
|
||||||
|
self.instruction(I32Add);
|
||||||
|
self.instruction(I32Const(-0x10f800));
|
||||||
|
self.instruction(I32LtU);
|
||||||
|
self.instruction(BrIf(0));
|
||||||
|
self.instruction(LocalGet(local));
|
||||||
|
self.instruction(I32Const(0x110000));
|
||||||
|
self.instruction(I32Ne);
|
||||||
|
self.instruction(BrIf(1));
|
||||||
|
self.instruction(End);
|
||||||
|
self.trap(Trap::InvalidChar);
|
||||||
|
self.instruction(End);
|
||||||
|
|
||||||
|
self.push_dst_addr(dst);
|
||||||
|
self.instruction(LocalGet(local));
|
||||||
|
match dst {
|
||||||
|
Destination::Memory(mem) => {
|
||||||
|
self.i32_store(mem);
|
||||||
|
}
|
||||||
|
Destination::Stack(stack) => self.stack_set(stack, ValType::I32),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn translate_record(
|
fn translate_record(
|
||||||
&mut self,
|
&mut self,
|
||||||
src_ty: TypeRecordIndex,
|
src_ty: TypeRecordIndex,
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ pub enum Trap {
|
|||||||
CannotEnter,
|
CannotEnter,
|
||||||
UnalignedPointer,
|
UnalignedPointer,
|
||||||
InvalidDiscriminant,
|
InvalidDiscriminant,
|
||||||
|
InvalidChar,
|
||||||
AssertFailed(&'static str),
|
AssertFailed(&'static str),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -101,6 +102,7 @@ impl fmt::Display for Trap {
|
|||||||
Trap::CannotEnter => "cannot enter instance".fmt(f),
|
Trap::CannotEnter => "cannot enter instance".fmt(f),
|
||||||
Trap::UnalignedPointer => "pointer not aligned correctly".fmt(f),
|
Trap::UnalignedPointer => "pointer not aligned correctly".fmt(f),
|
||||||
Trap::InvalidDiscriminant => "invalid variant discriminant".fmt(f),
|
Trap::InvalidDiscriminant => "invalid variant discriminant".fmt(f),
|
||||||
|
Trap::InvalidChar => "invalid char value specified".fmt(f),
|
||||||
Trap::AssertFailed(s) => write!(f, "assertion failure: {}", s),
|
Trap::AssertFailed(s) => write!(f, "assertion failure: {}", s),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1107,3 +1107,121 @@
|
|||||||
(instance $c1 (instantiate $c1))
|
(instance $c1 (instantiate $c1))
|
||||||
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
;; roundtrip some valid chars
|
||||||
|
(component
|
||||||
|
(component $c1
|
||||||
|
(core module $m
|
||||||
|
(func (export "a") (param i32) (result i32) local.get 0)
|
||||||
|
)
|
||||||
|
(core instance $m (instantiate $m))
|
||||||
|
(func (export "a") (param char) (result char) (canon lift (core func $m "a")))
|
||||||
|
)
|
||||||
|
(component $c2
|
||||||
|
(import "" (instance $i
|
||||||
|
(export "a" (func (param char) (result char)))
|
||||||
|
))
|
||||||
|
|
||||||
|
(core func $a (canon lower (func $i "a")))
|
||||||
|
|
||||||
|
(core module $m
|
||||||
|
(import "" "a" (func $a (param i32) (result i32)))
|
||||||
|
|
||||||
|
(func $start
|
||||||
|
(call $roundtrip (i32.const 0))
|
||||||
|
(call $roundtrip (i32.const 0xab))
|
||||||
|
(call $roundtrip (i32.const 0xd7ff))
|
||||||
|
(call $roundtrip (i32.const 0xe000))
|
||||||
|
(call $roundtrip (i32.const 0x10ffff))
|
||||||
|
)
|
||||||
|
(func $roundtrip (export "roundtrip") (param i32)
|
||||||
|
local.get 0
|
||||||
|
call $a
|
||||||
|
local.get 0
|
||||||
|
i32.ne
|
||||||
|
if unreachable end
|
||||||
|
)
|
||||||
|
(start $start)
|
||||||
|
)
|
||||||
|
(core instance $m (instantiate $m
|
||||||
|
(with "" (instance
|
||||||
|
(export "a" (func $a))
|
||||||
|
))
|
||||||
|
))
|
||||||
|
|
||||||
|
(func (export "roundtrip") (param char) (canon lift (core func $m "roundtrip")))
|
||||||
|
)
|
||||||
|
(instance $c1 (instantiate $c1))
|
||||||
|
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||||
|
|
||||||
|
(export "roundtrip" (func $c2 "roundtrip"))
|
||||||
|
)
|
||||||
|
|
||||||
|
(assert_return (invoke "roundtrip" (char.const "x")) (unit.const))
|
||||||
|
(assert_return (invoke "roundtrip" (char.const "⛳")) (unit.const))
|
||||||
|
(assert_return (invoke "roundtrip" (char.const "🍰")) (unit.const))
|
||||||
|
|
||||||
|
;; invalid chars
|
||||||
|
(assert_trap
|
||||||
|
(component
|
||||||
|
(component $c1
|
||||||
|
(core module $m (func (export "a") (param i32)))
|
||||||
|
(core instance $m (instantiate $m))
|
||||||
|
(func (export "a") (param char) (canon lift (core func $m "a")))
|
||||||
|
)
|
||||||
|
(component $c2
|
||||||
|
(import "" (instance $i (export "a" (func (param char)))))
|
||||||
|
(core func $a (canon lower (func $i "a")))
|
||||||
|
(core module $m
|
||||||
|
(import "" "a" (func $a (param i32)))
|
||||||
|
(func $start (call $a (i32.const 0xd800)))
|
||||||
|
(start $start)
|
||||||
|
)
|
||||||
|
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
|
||||||
|
)
|
||||||
|
(instance $c1 (instantiate $c1))
|
||||||
|
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||||
|
)
|
||||||
|
"unreachable")
|
||||||
|
(assert_trap
|
||||||
|
(component
|
||||||
|
(component $c1
|
||||||
|
(core module $m (func (export "a") (param i32)))
|
||||||
|
(core instance $m (instantiate $m))
|
||||||
|
(func (export "a") (param char) (canon lift (core func $m "a")))
|
||||||
|
)
|
||||||
|
(component $c2
|
||||||
|
(import "" (instance $i (export "a" (func (param char)))))
|
||||||
|
(core func $a (canon lower (func $i "a")))
|
||||||
|
(core module $m
|
||||||
|
(import "" "a" (func $a (param i32)))
|
||||||
|
(func $start (call $a (i32.const 0xdfff)))
|
||||||
|
(start $start)
|
||||||
|
)
|
||||||
|
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
|
||||||
|
)
|
||||||
|
(instance $c1 (instantiate $c1))
|
||||||
|
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||||
|
)
|
||||||
|
"unreachable")
|
||||||
|
(assert_trap
|
||||||
|
(component
|
||||||
|
(component $c1
|
||||||
|
(core module $m (func (export "a") (param i32)))
|
||||||
|
(core instance $m (instantiate $m))
|
||||||
|
(func (export "a") (param char) (canon lift (core func $m "a")))
|
||||||
|
)
|
||||||
|
(component $c2
|
||||||
|
(import "" (instance $i (export "a" (func (param char)))))
|
||||||
|
(core func $a (canon lower (func $i "a")))
|
||||||
|
(core module $m
|
||||||
|
(import "" "a" (func $a (param i32)))
|
||||||
|
(func $start (call $a (i32.const 0x110000)))
|
||||||
|
(start $start)
|
||||||
|
)
|
||||||
|
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
|
||||||
|
)
|
||||||
|
(instance $c1 (instantiate $c1))
|
||||||
|
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
|
||||||
|
)
|
||||||
|
"unreachable")
|
||||||
|
|||||||
Reference in New Issue
Block a user