diff --git a/crates/wiggle/generate/src/config.rs b/crates/wiggle/generate/src/config.rs index 0d4ada6402..5f5fcb1472 100644 --- a/crates/wiggle/generate/src/config.rs +++ b/crates/wiggle/generate/src/config.rs @@ -1,11 +1,15 @@ -use std::path::{Path, PathBuf}; - -use proc_macro2::Span; -use syn::{ - braced, bracketed, - parse::{Parse, ParseStream}, - punctuated::Punctuated, - Error, Ident, LitStr, Result, Token, +use { + proc_macro2::Span, + std::{ + iter::FromIterator, + path::{Path, PathBuf}, + }, + syn::{ + braced, bracketed, + parse::{Parse, ParseStream}, + punctuated::Punctuated, + Error, Ident, LitStr, Result, Token, + }, }; #[derive(Debug, Clone)] @@ -23,7 +27,8 @@ pub enum ConfigField { impl ConfigField { pub fn parse_pair(ident: &str, value: ParseStream, err_loc: Span) -> Result { match ident { - "witx" => Ok(ConfigField::Witx(value.parse()?)), + "witx" => Ok(ConfigField::Witx(WitxConf::Paths(value.parse()?))), + "witx_literal" => Ok(ConfigField::Witx(WitxConf::Literal(value.parse()?))), "ctx" => Ok(ConfigField::Ctx(value.parse()?)), _ => Err(Error::new(err_loc, "expected `witx` or `ctx`")), } @@ -61,6 +66,15 @@ impl Config { .ok_or_else(|| Error::new(err_loc, "`ctx` field required"))?, }) } + + /// Load the `witx` document for the configuration. + /// + /// # Panics + /// + /// This method will panic if the paths given in the `witx` field were not valid documents. + pub fn load_document(&self) -> witx::Document { + self.witx.load_document() + } } impl Parse for Config { @@ -73,31 +87,110 @@ impl Parse for Config { } } +/// The witx document(s) that will be loaded from a [`Config`](struct.Config.html). +/// +/// A witx interface definition can be provided either as a collection of relative paths to +/// documents, or as a single inlined string literal. Note that `(use ...)` directives are not +/// permitted when providing a string literal. #[derive(Debug, Clone)] -pub struct WitxConf { - pub paths: Vec, +pub enum WitxConf { + /// A collection of paths pointing to witx files. + Paths(Paths), + /// A single witx document, provided as a string literal. + Literal(Literal), } impl WitxConf { + /// Load the `witx` document. + /// + /// # Panics + /// + /// This method will panic if the paths given in the `witx` field were not valid documents, or + /// if any of the given documents were not syntactically valid. + pub fn load_document(&self) -> witx::Document { + match self { + Self::Paths(paths) => witx::load(paths.as_ref()).expect("loading witx"), + Self::Literal(doc) => witx::parse(doc.as_ref()).expect("parsing witx"), + } + } + + /// If using the [`Paths`][paths] syntax, make all paths relative to a root directory. + /// + /// [paths]: enum.WitxConf.html#variant.Paths pub fn make_paths_relative_to>(&mut self, root: P) { - self.paths.iter_mut().for_each(|p| { - if !p.is_absolute() { - *p = PathBuf::from(root.as_ref()).join(p.clone()); - } - }); + if let Self::Paths(paths) = self { + paths.as_mut().iter_mut().for_each(|p| { + if !p.is_absolute() { + *p = PathBuf::from(root.as_ref()).join(p.clone()); + } + }); + } } } -impl Parse for WitxConf { +/// A collection of paths, pointing to witx documents. +#[derive(Debug, Clone)] +pub struct Paths(Vec); + +impl Paths { + /// Create a new, empty collection of paths. + pub fn new() -> Self { + Default::default() + } +} + +impl Default for Paths { + fn default() -> Self { + Self(Default::default()) + } +} + +impl AsRef<[PathBuf]> for Paths { + fn as_ref(&self) -> &[PathBuf] { + self.0.as_ref() + } +} + +impl AsMut<[PathBuf]> for Paths { + fn as_mut(&mut self) -> &mut [PathBuf] { + self.0.as_mut() + } +} + +impl FromIterator for Paths { + fn from_iter(iter: I) -> Self + where + I: IntoIterator, + { + Self(iter.into_iter().collect()) + } +} + +impl Parse for Paths { fn parse(input: ParseStream) -> Result { let content; let _ = bracketed!(content in input); let path_lits: Punctuated = content.parse_terminated(Parse::parse)?; - let paths = path_lits + Ok(path_lits .iter() .map(|lit| PathBuf::from(lit.value())) - .collect(); - Ok(WitxConf { paths }) + .collect()) + } +} + +/// A single witx document, provided as a string literal. +#[derive(Debug, Clone)] +pub struct Literal(String); + +impl AsRef for Literal { + fn as_ref(&self) -> &str { + self.0.as_ref() + } +} + +impl Parse for Literal { + fn parse(input: ParseStream) -> Result { + Ok(Self(input.parse::()?.value())) } } diff --git a/crates/wiggle/generate/src/names.rs b/crates/wiggle/generate/src/names.rs index 644b877d8c..a17897b1a6 100644 --- a/crates/wiggle/generate/src/names.rs +++ b/crates/wiggle/generate/src/names.rs @@ -1,10 +1,10 @@ -use heck::{CamelCase, ShoutySnakeCase, SnakeCase}; +use crate::lifetimes::LifetimeExt; +use escaping::{escape_id, handle_2big_enum_variant, NamingConvention}; +use heck::{ShoutySnakeCase, SnakeCase}; use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; use witx::{AtomType, BuiltinType, Id, Type, TypeRef}; -use crate::lifetimes::LifetimeExt; - pub struct Names { ctx_type: Ident, runtime_mod: TokenStream, @@ -17,16 +17,20 @@ impl Names { runtime_mod, } } + pub fn ctx_type(&self) -> Ident { self.ctx_type.clone() } + pub fn runtime_mod(&self) -> TokenStream { self.runtime_mod.clone() } + pub fn type_(&self, id: &Id) -> TokenStream { - let ident = format_ident!("{}", id.as_str().to_camel_case()); + let ident = escape_id(id, NamingConvention::CamelCase); quote!(#ident) } + pub fn builtin_type(&self, b: BuiltinType, lifetime: TokenStream) -> TokenStream { match b { BuiltinType::String => { @@ -83,15 +87,12 @@ impl Names { } } + /// Convert an enum variant from its [`Id`][witx] name to its Rust [`Ident`][id] representation. + /// + /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html + /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html pub fn enum_variant(&self, id: &Id) -> Ident { - // FIXME this is a hack - just a proof of concept. - if id.as_str().starts_with('2') { - format_ident!("TooBig") - } else if id.as_str() == "type" { - format_ident!("Type") - } else { - format_ident!("{}", id.as_str().to_camel_case()) - } + handle_2big_enum_variant(id).unwrap_or_else(|| escape_id(id, NamingConvention::CamelCase)) } pub fn flag_member(&self, id: &Id) -> Ident { @@ -102,34 +103,44 @@ impl Names { format_ident!("{}", id.as_str().to_shouty_snake_case()) } + /// Convert a struct member from its [`Id`][witx] name to its Rust [`Ident`][id] representation. + /// + /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html + /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html pub fn struct_member(&self, id: &Id) -> Ident { - // FIXME this is a hack - just a proof of concept. - if id.as_str() == "type" { - format_ident!("type_") - } else { - format_ident!("{}", id.as_str().to_snake_case()) - } + escape_id(id, NamingConvention::SnakeCase) } + /// Convert a module name from its [`Id`][witx] name to its Rust [`Ident`][id] representation. + /// + /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html + /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html pub fn module(&self, id: &Id) -> Ident { - format_ident!("{}", id.as_str().to_snake_case()) + escape_id(id, NamingConvention::SnakeCase) } + /// Convert a trait name from its [`Id`][witx] name to its Rust [`Ident`][id] representation. + /// + /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html + /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html pub fn trait_name(&self, id: &Id) -> Ident { - format_ident!("{}", id.as_str().to_camel_case()) + escape_id(id, NamingConvention::CamelCase) } + /// Convert a function name from its [`Id`][witx] name to its Rust [`Ident`][id] representation. + /// + /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html + /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html pub fn func(&self, id: &Id) -> Ident { - format_ident!("{}", id.as_str().to_snake_case()) + escape_id(id, NamingConvention::SnakeCase) } + /// Convert a parameter name from its [`Id`][witx] name to its Rust [`Ident`][id] representation. + /// + /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html + /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html pub fn func_param(&self, id: &Id) -> Ident { - // FIXME this is a hack - just a proof of concept. - if id.as_str() == "in" { - format_ident!("in_") - } else { - format_ident!("{}", id.as_str().to_snake_case()) - } + escape_id(id, NamingConvention::SnakeCase) } pub fn func_core_arg(&self, arg: &witx::CoreParamType) -> Ident { @@ -174,3 +185,113 @@ impl Names { } } } + +/// Identifier escaping utilities. +/// +/// This module most importantly exports an `escape_id` function that can be used to properly +/// escape tokens that conflict with strict and reserved keywords, as of Rust's 2018 edition. +/// +/// Weak keywords are not included as their semantic rules do not have the same implications as +/// those of strict and reserved keywords. `union` for example, is permitted as the name of a +/// variable. `dyn` was promoted to a strict keyword beginning in the 2018 edition. +mod escaping { + use { + heck::{CamelCase, SnakeCase}, + proc_macro2::Ident, + quote::format_ident, + witx::Id, + }; + + /// Identifier naming convention. + /// + /// Because shouty snake case values (identifiers that look `LIKE_THIS`) cannot potentially + /// conflict with any Rust keywords, this enum only include snake and camel case variants. + pub enum NamingConvention { + /// Snake case. Used to denote values `LikeThis`. + CamelCase, + /// Snake case. Used to denote values `like_this`. + SnakeCase, + } + + /// Given a witx [`Id`][witx] and a [`NamingConvention`][naming], return a [`Ident`] word of + /// Rust syntax that accounts for escaping both strict and reserved keywords. If an identifier + /// would have conflicted with a keyword, a trailing underscode will be appended. + /// + /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html + /// [naming]: enum.NamingConvention.html + /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html + pub fn escape_id(id: &Id, conv: NamingConvention) -> Ident { + use NamingConvention::{CamelCase, SnakeCase}; + match (conv, id.as_str()) { + // For camel-cased identifiers, `Self` is the only potential keyword conflict. + (CamelCase, "self") => format_ident!("Self_"), + (CamelCase, s) => format_ident!("{}", s.to_camel_case()), + // Snake-cased identifiers are where the bulk of conflicts can occur. + (SnakeCase, s) => { + let s = s.to_snake_case(); + if STRICT.iter().chain(RESERVED).any(|k| *k == s) { + // If the camel-cased string matched any strict or reserved keywords, then + // append a trailing underscore to the identifier we generate. + format_ident!("{}_", s) + } else { + format_ident!("{}", s) // Otherwise, use the string as is. + } + } + } + } + + /// Strict keywords. + /// + /// > Strict keywords cannot be used as the names of: + /// > * Items + /// > * Variables and function parameters + /// > * Fields and variants + /// > * Type parameters + /// > * Lifetime parameters or loop labels + /// > * Macros or attributes + /// > * Macro placeholders + /// > * Crates + /// > + /// > - [The Rust Reference][ref] + /// + /// This list also includes keywords that were introduced in the 2018 edition of Rust. + /// + /// [ref]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords + const STRICT: &[&str] = &[ + "as", "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", + "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", + "mut", "pub", "ref", "return", "self", "Self", "static", "struct", "super", "trait", + "true", "type", "unsafe", "use", "where", "while", + ]; + + /// Reserved keywords. + /// + /// > These keywords aren't used yet, but they are reserved for future use. They have the same + /// > restrictions as strict keywords. The reasoning behind this is to make current programs + /// > forward compatible with future versions of Rust by forbidding them to use these keywords. + /// > + /// > - [The Rust Reference][ref] + /// + /// This list also includes keywords that were introduced in the 2018 edition of Rust. + /// + /// [ref]: https://doc.rust-lang.org/reference/keywords.html#reserved-keywords + const RESERVED: &[&str] = &[ + "abstract", "become", "box", "do", "final", "macro", "override", "priv", "try", "typeof", + "unsized", "virtual", "yield", + ]; + + /// Handle WASI's [`errno::2big`][err] variant. + /// + /// This is an unfortunate edge case that must account for when generating `enum` variants. + /// This will only return `Some(_)` if the given witx identifier *is* `2big`, otherwise this + /// function will return `None`. + /// + /// [err]: https://github.com/WebAssembly/WASI/blob/master/phases/snapshot/docs.md#-errno-enumu16 + pub fn handle_2big_enum_variant(id: &Id) -> Option { + if id.as_str() == "2big" { + Some(format_ident!("TooBig")) + } else { + None + } + } +} diff --git a/crates/wiggle/macro/src/lib.rs b/crates/wiggle/macro/src/lib.rs index 8fd6f680a7..7587a42451 100644 --- a/crates/wiggle/macro/src/lib.rs +++ b/crates/wiggle/macro/src/lib.rs @@ -95,7 +95,7 @@ pub fn from_witx(args: TokenStream) -> TokenStream { std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR env var"), ); - let doc = witx::load(&config.witx.paths).expect("loading witx"); + let doc = config.load_document(); let names = wiggle_generate::Names::new(&config.ctx.name, quote!(wiggle)); let code = wiggle_generate::generate(&doc, &names); diff --git a/crates/wiggle/tests/keywords.rs b/crates/wiggle/tests/keywords.rs new file mode 100644 index 0000000000..ea002fcc6d --- /dev/null +++ b/crates/wiggle/tests/keywords.rs @@ -0,0 +1,64 @@ +//! Tests to check that keywords in `witx` files are escaped. +//! +//! No `#[test]` functions are defined below because the `wiggle::from_witx!` macro expanding into +//! syntactically correct Rust code at compile time is the subject under test. + +/// Test that an enum variant that conflicts with a Rust keyword can be compiled properly. +mod enum_test { + wiggle::from_witx!({ + witx_literal: + "(typename $self + (enum u8 + $self + $2big + ) + )", + ctx: DummyCtx, + }); +} + +/// Test module, trait, function, and function parameter names conflicting with Rust keywords. +/// +/// We use `self` because the camel-cased trait name `Self` is *also* a strict keyword. This lets +/// us simultaneously test the name of the module and the generated trait. +mod module_trait_fn_and_arg_test { + use wiggle_test::WasiCtx; + wiggle::from_witx!({ + witx_literal: + "(module $self + (@interface func (export \"fn\") + (param $use u32) + (param $virtual u32) + ) + )", + ctx: WasiCtx, + }); + impl<'a> self_::Self_ for WasiCtx<'a> { + #[allow(unused_variables)] + fn fn_(&self, use_: u32, virtual_: u32) -> Result<(), ()> { + unimplemented!(); + } + } +} + +/// Test that a struct and member names conflicting with Rust keywords can be compiled properly. +mod struct_test { + wiggle::from_witx!({ + witx_literal: + "(typename $self + (struct + (field $become s32) + (field $mut s32) + ) + )", + ctx: DummyCtx, + }); +} + +/// Test that a union variant that conflicts with a Rust keyword can be compiled properly. +mod union_test { + wiggle::from_witx!({ + witx: ["tests/keywords_union.witx"], + ctx: DummyCtx, + }); +} diff --git a/crates/wiggle/tests/keywords_union.witx b/crates/wiggle/tests/keywords_union.witx new file mode 100644 index 0000000000..14d29b70dd --- /dev/null +++ b/crates/wiggle/tests/keywords_union.witx @@ -0,0 +1,15 @@ +(typename $union + (enum u8 + $self + $power + ) +) + +(typename $self + (union $union + ;; A union variant that will expand to a strict keyword `Self`. + (field $self (@witx pointer f32)) + ;; Oh it's true, that there's power in a union! + (field $power (@witx pointer f32)) + ) +)