Implement char type in adapter fusion (#4544)

This commit implements the translation of `char` which validates that
it's in the valid range of unicode scalar values. The precise validation
here is lifted from LLVM in the hopes that it's probably better than
whatever I would concoct by hand.
This commit is contained in:
Alex Crichton
2022-07-28 11:47:01 -05:00
committed by GitHub
parent 8137432e67
commit e1148e43be
4 changed files with 175 additions and 0 deletions

View File

@@ -51,6 +51,7 @@ enum ValType {
S64, S64,
Float32, Float32,
Float64, Float64,
Char,
Record(Vec<ValType>), Record(Vec<ValType>),
Tuple(Vec<ValType>), Tuple(Vec<ValType>),
Variant(NonZeroLenVec<ValType>), Variant(NonZeroLenVec<ValType>),
@@ -206,6 +207,7 @@ fn intern(types: &mut ComponentTypesBuilder, ty: &ValType) -> InterfaceType {
ValType::S64 => InterfaceType::S64, ValType::S64 => InterfaceType::S64,
ValType::Float32 => InterfaceType::Float32, ValType::Float32 => InterfaceType::Float32,
ValType::Float64 => InterfaceType::Float64, ValType::Float64 => InterfaceType::Float64,
ValType::Char => InterfaceType::Char,
ValType::Record(tys) => { ValType::Record(tys) => {
let ty = TypeRecord { let ty = TypeRecord {
fields: tys fields: tys

View File

@@ -346,6 +346,7 @@ impl Compiler<'_, '_> {
InterfaceType::S64 => self.translate_s64(src, dst_ty, dst), InterfaceType::S64 => self.translate_s64(src, dst_ty, dst),
InterfaceType::Float32 => self.translate_f32(src, dst_ty, dst), InterfaceType::Float32 => self.translate_f32(src, dst_ty, dst),
InterfaceType::Float64 => self.translate_f64(src, dst_ty, dst), InterfaceType::Float64 => self.translate_f64(src, dst_ty, dst),
InterfaceType::Char => self.translate_char(src, dst_ty, dst),
InterfaceType::Record(t) => self.translate_record(*t, src, dst_ty, dst), InterfaceType::Record(t) => self.translate_record(*t, src, dst_ty, dst),
InterfaceType::Tuple(t) => self.translate_tuple(*t, src, dst_ty, dst), InterfaceType::Tuple(t) => self.translate_tuple(*t, src, dst_ty, dst),
InterfaceType::Variant(v) => self.translate_variant(*v, src, dst_ty, dst), InterfaceType::Variant(v) => self.translate_variant(*v, src, dst_ty, dst),
@@ -544,6 +545,58 @@ impl Compiler<'_, '_> {
} }
} }
fn translate_char(&mut self, src: &Source<'_>, dst_ty: &InterfaceType, dst: &Destination) {
assert!(matches!(dst_ty, InterfaceType::Char));
let local = self.gen_local(ValType::I32);
match src {
Source::Memory(mem) => self.i32_load(mem),
Source::Stack(stack) => self.stack_get(stack, ValType::I32),
}
self.instruction(LocalSet(local));
// This sequence is copied from the output of LLVM for:
//
// pub extern "C" fn foo(x: u32) -> char {
// char::try_from(x)
// .unwrap_or_else(|_| std::arch::wasm32::unreachable())
// }
//
// Apparently this does what's required by the canonical ABI:
//
// def i32_to_char(opts, i):
// trap_if(i >= 0x110000)
// trap_if(0xD800 <= i <= 0xDFFF)
// return chr(i)
//
// ... but I don't know how it works other than "well I trust LLVM"
self.instruction(Block(BlockType::Empty));
self.instruction(Block(BlockType::Empty));
self.instruction(LocalGet(local));
self.instruction(I32Const(0xd800));
self.instruction(I32Xor);
self.instruction(I32Const(-0x110000));
self.instruction(I32Add);
self.instruction(I32Const(-0x10f800));
self.instruction(I32LtU);
self.instruction(BrIf(0));
self.instruction(LocalGet(local));
self.instruction(I32Const(0x110000));
self.instruction(I32Ne);
self.instruction(BrIf(1));
self.instruction(End);
self.trap(Trap::InvalidChar);
self.instruction(End);
self.push_dst_addr(dst);
self.instruction(LocalGet(local));
match dst {
Destination::Memory(mem) => {
self.i32_store(mem);
}
Destination::Stack(stack) => self.stack_set(stack, ValType::I32),
}
}
fn translate_record( fn translate_record(
&mut self, &mut self,
src_ty: TypeRecordIndex, src_ty: TypeRecordIndex,

View File

@@ -28,6 +28,7 @@ pub enum Trap {
CannotEnter, CannotEnter,
UnalignedPointer, UnalignedPointer,
InvalidDiscriminant, InvalidDiscriminant,
InvalidChar,
AssertFailed(&'static str), AssertFailed(&'static str),
} }
@@ -101,6 +102,7 @@ impl fmt::Display for Trap {
Trap::CannotEnter => "cannot enter instance".fmt(f), Trap::CannotEnter => "cannot enter instance".fmt(f),
Trap::UnalignedPointer => "pointer not aligned correctly".fmt(f), Trap::UnalignedPointer => "pointer not aligned correctly".fmt(f),
Trap::InvalidDiscriminant => "invalid variant discriminant".fmt(f), Trap::InvalidDiscriminant => "invalid variant discriminant".fmt(f),
Trap::InvalidChar => "invalid char value specified".fmt(f),
Trap::AssertFailed(s) => write!(f, "assertion failure: {}", s), Trap::AssertFailed(s) => write!(f, "assertion failure: {}", s),
} }
} }

View File

@@ -1107,3 +1107,121 @@
(instance $c1 (instantiate $c1)) (instance $c1 (instantiate $c1))
(instance $c2 (instantiate $c2 (with "" (instance $c1)))) (instance $c2 (instantiate $c2 (with "" (instance $c1))))
) )
;; roundtrip some valid chars
(component
(component $c1
(core module $m
(func (export "a") (param i32) (result i32) local.get 0)
)
(core instance $m (instantiate $m))
(func (export "a") (param char) (result char) (canon lift (core func $m "a")))
)
(component $c2
(import "" (instance $i
(export "a" (func (param char) (result char)))
))
(core func $a (canon lower (func $i "a")))
(core module $m
(import "" "a" (func $a (param i32) (result i32)))
(func $start
(call $roundtrip (i32.const 0))
(call $roundtrip (i32.const 0xab))
(call $roundtrip (i32.const 0xd7ff))
(call $roundtrip (i32.const 0xe000))
(call $roundtrip (i32.const 0x10ffff))
)
(func $roundtrip (export "roundtrip") (param i32)
local.get 0
call $a
local.get 0
i32.ne
if unreachable end
)
(start $start)
)
(core instance $m (instantiate $m
(with "" (instance
(export "a" (func $a))
))
))
(func (export "roundtrip") (param char) (canon lift (core func $m "roundtrip")))
)
(instance $c1 (instantiate $c1))
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
(export "roundtrip" (func $c2 "roundtrip"))
)
(assert_return (invoke "roundtrip" (char.const "x")) (unit.const))
(assert_return (invoke "roundtrip" (char.const "⛳")) (unit.const))
(assert_return (invoke "roundtrip" (char.const "🍰")) (unit.const))
;; invalid chars
(assert_trap
(component
(component $c1
(core module $m (func (export "a") (param i32)))
(core instance $m (instantiate $m))
(func (export "a") (param char) (canon lift (core func $m "a")))
)
(component $c2
(import "" (instance $i (export "a" (func (param char)))))
(core func $a (canon lower (func $i "a")))
(core module $m
(import "" "a" (func $a (param i32)))
(func $start (call $a (i32.const 0xd800)))
(start $start)
)
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
)
(instance $c1 (instantiate $c1))
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
)
"unreachable")
(assert_trap
(component
(component $c1
(core module $m (func (export "a") (param i32)))
(core instance $m (instantiate $m))
(func (export "a") (param char) (canon lift (core func $m "a")))
)
(component $c2
(import "" (instance $i (export "a" (func (param char)))))
(core func $a (canon lower (func $i "a")))
(core module $m
(import "" "a" (func $a (param i32)))
(func $start (call $a (i32.const 0xdfff)))
(start $start)
)
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
)
(instance $c1 (instantiate $c1))
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
)
"unreachable")
(assert_trap
(component
(component $c1
(core module $m (func (export "a") (param i32)))
(core instance $m (instantiate $m))
(func (export "a") (param char) (canon lift (core func $m "a")))
)
(component $c2
(import "" (instance $i (export "a" (func (param char)))))
(core func $a (canon lower (func $i "a")))
(core module $m
(import "" "a" (func $a (param i32)))
(func $start (call $a (i32.const 0x110000)))
(start $start)
)
(core instance (instantiate $m (with "" (instance (export "a" (func $a))))))
)
(instance $c1 (instantiate $c1))
(instance $c2 (instantiate $c2 (with "" (instance $c1))))
)
"unreachable")