diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 82373c0c2..f93257231 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,6 +45,9 @@ jobs: - name: Install Vulkan loader run: sudo apt-get install libvulkan-dev - uses: actions/checkout@v6 + - name: Checkout submodule + # Manually update submodules with --checkout because they are configured with update=none and will be skipped otherwise + run: git submodule update --recursive --init --force --checkout - name: Test all targets run: cargo test --workspace --all-targets - name: Test docs diff --git a/.gitmodules b/.gitmodules index f59e7f906..c3d06001c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,6 @@ path = generator/Vulkan-Headers url = https://github.com/KhronosGroup/Vulkan-Headers update = none +[submodule "generator-rewrite/Vulkan-Headers"] + path = generator-rewrite/Vulkan-Headers + url = https://github.com/KhronosGroup/Vulkan-Headers.git diff --git a/analysis/Cargo.toml b/analysis/Cargo.toml index 64d058be9..624ecf310 100644 --- a/analysis/Cargo.toml +++ b/analysis/Cargo.toml @@ -4,3 +4,8 @@ version = "2.0.0" edition = "2021" [dependencies] +roxmltree = "0.21" +tracing = "0.1" + +[dev-dependencies] +tracing-test = "0.2" diff --git a/analysis/src/cdecl.rs b/analysis/src/cdecl.rs new file mode 100644 index 000000000..62612926a --- /dev/null +++ b/analysis/src/cdecl.rs @@ -0,0 +1,393 @@ +use std::num::NonZeroU8; + +/// Identifier-category-aware minimal tokenization of a subset of C syntax, +/// sufficient for parsing the C declarations used in `vk.xml`. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum CTok<'a> { + /// Identifier referring to a type declaration in scope. + TypeName(&'a str), + + /// Identifier referring to a value declaration in scope. + ValueName(&'a str), + + /// Identifier that is being presently declared (exactly one per `CDecl`). + DeclName(&'a str), + + /// Supported keyword (one of [`CTok::SUPPORTED_KEYWORDS`]). + Kw(&'static str), + + /// Any ASCII punctuation (i.e. as determined by [`char::is_ascii_punctuation`]). + // FIXME(eddyb) this could really use the `std::ascii` API. + Punct(char), + + /// Integer literal (for e.g. array lengths). + IntLit(&'a str), + + /// Unknown identifier (all known cases are spec bugs or deficiencies). + StrayIdent(&'a str), +} + +#[derive(Debug)] +pub struct UnsupportedCTok<'a>(#[allow(dead_code)] &'a str); + +impl<'a> CTok<'a> { + pub const SUPPORTED_KEYWORDS: &'static [&'static str] = &["const", "struct", "typedef", "void"]; + + pub(crate) fn lex_into( + s: &'a str, + out: &mut impl Extend>, + ) -> Result<(), UnsupportedCTok<'a>> { + // FIXME(eddyb) this could really use the `std::ascii` API. + let mut s = s; + while let Some(c) = s.chars().next() { + if !c.is_ascii() { + return Err(UnsupportedCTok(s)); + } + + let is_ident_or_number = |c: char| c.is_ascii_alphanumeric() || c == '_'; + let tok = if is_ident_or_number(c) { + let len = s.chars().take_while(|&c| is_ident_or_number(c)).count(); + let (tok, rest) = s.split_at(len); + s = rest; + if c.is_ascii_digit() { + CTok::IntLit(tok) + } else if let Some(kw) = CTok::SUPPORTED_KEYWORDS.iter().find(|&&kw| kw == tok) { + CTok::Kw(kw) + } else { + CTok::StrayIdent(tok) + } + } else if c.is_ascii_punctuation() { + s = &s[1..]; + CTok::Punct(c) + } else if c.is_ascii_whitespace() { + s = s.trim_start(); + continue; + } else { + return Err(UnsupportedCTok(s)); + }; + out.extend([tok]); + } + Ok(()) + } +} + +#[derive(Debug, PartialEq, Eq)] +pub struct CDecl<'a> { + pub ty: CType<'a>, + pub name: &'a str, + pub bitfield_width: Option, +} + +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum CDeclMode { + TypeDef, + StructMember, + FuncParam, + FuncTypeParam, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum CType<'a> { + Base(CBaseType<'a>), + Ptr { + implicit_for_decay: bool, + is_const: bool, + pointee: Box>, + }, + Array { + element: Box>, + len: CArrayLen<'a>, + }, + Func { + ret_ty: Option>>, + params: Vec>, + }, +} + +impl CType<'_> { + pub const VOID: CType<'static> = CType::Base(CBaseType { + struct_tag: false, + name: "void", + }); +} + +#[derive(Debug, PartialEq, Eq)] +pub struct CBaseType<'a> { + pub struct_tag: bool, + pub name: &'a str, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum CArrayLen<'a> { + Named(&'a str), + Literal(u128), +} + +#[derive(Debug)] +pub struct CDeclParseError<'a, 'b> { + pub kind: CDeclParseErrorKind<'a>, + pub tokens: &'b [CTok<'a>], +} + +#[derive(Debug)] +pub enum CDeclParseErrorKind<'a> { + Missing(&'static str), + Multiple(&'static str), + Unused(&'static str), + InvalidIntLit(std::num::ParseIntError), + UnsupportedLeftmostToken(CTok<'a>), + UnsupportedRightmostToken(CTok<'a>), + UnbalancedBrackets, + UnsupportedArrayLength, +} + +impl<'a> CDecl<'a> { + // HACK(eddyb) this split is literally just to simplify error tracking. + pub(crate) fn parse<'b>( + mode: CDeclMode, + tokens: &'b [CTok<'a>], + ) -> Result, CDeclParseError<'a, 'b>> { + CDecl::parse_inner(mode, tokens).map_err(|kind| CDeclParseError { kind, tokens }) + } + fn parse_inner<'b>( + mode: CDeclMode, + tokens: &'b [CTok<'a>], + ) -> Result, CDeclParseErrorKind<'a>> { + use CDeclParseErrorKind as ErrorKind; + + trait InsertIfNone { + fn insert_if_none(&mut self, value: T) -> Option<&mut T>; + } + impl InsertIfNone for Option { + fn insert_if_none(&mut self, value: T) -> Option<&mut T> { + self.is_none().then(|| self.insert(value)) + } + } + + let (mut left, decl_name, mut right) = { + let mut decl_names = + tokens + .iter() + .copied() + .enumerate() + .filter_map(|(i, tok)| match tok { + CTok::DeclName(name) => Some((i, name)), + + // HACK(eddyb) this is only allowed due to the (few) + // function pointer typedefs in `vk.xml`, which don't + // label parameter names in any special way. + CTok::StrayIdent(name) if mode == CDeclMode::FuncTypeParam => { + Some((i, name)) + } + + _ => None, + }); + match (decl_names.next(), decl_names.next()) { + (Some((i, name)), None) => (&tokens[..i], name, &tokens[i + 1..]), + (None, _) => return Err(ErrorKind::Missing("DeclName")), + (Some(_), Some(_)) => return Err(ErrorKind::Multiple("DeclName")), + } + }; + + if mode == CDeclMode::TypeDef { + // NOTE(eddyb) `typedef` can appear later on as well, so this is + // unnecessarily strict, but it avoids much more involved tracking. + left = left + .strip_prefix(&[CTok::Kw("typedef")]) + .ok_or(ErrorKind::Missing("typedef"))?; + right = right + .strip_suffix(&[CTok::Punct(';')]) + .ok_or(ErrorKind::Missing(";"))?; + } + + let bitfield_width = match right { + [rest @ .., CTok::Punct(':'), CTok::IntLit(width_lit)] + if mode == CDeclMode::StructMember => + { + right = rest; + Some(width_lit.parse().map_err(ErrorKind::InvalidIntLit)?) + } + _ => None, + }; + + // FIXME(eddyb) deduplicate qualifier parsing somehow. + let mut const_qualif = match left { + [CTok::Kw("const"), rest @ ..] => { + left = rest; + Some(()) + } + _ => None, + }; + + let mut ty = CType::Base(match left { + [CTok::Kw("struct"), CTok::TypeName(name), rest @ ..] => { + left = rest; + CBaseType { + struct_tag: true, + name, + } + } + [CTok::TypeName(name) | CTok::Kw(name @ "void"), rest @ ..] => { + left = rest; + CBaseType { + struct_tag: false, + name, + } + } + _ => return Err(ErrorKind::Missing("TypeName")), + }); + + // This is the core of the C declaration parsing strategy: we have some + // type `T` (held in the variable `ty`) and tokens to either side of the + // name being declared, and at every step of the loops below there is a + // "closest binding" (postfix) "type operator", which we pattern-match + // from its side and then apply to `T`, replacing `T` with any of: + // - `T*` pointers (like Rust `*T`), from `T* ...` + // (only `left` side "type operator", and it takes precedence, making + // array-of-pointers much easier to spell out than pointer-to-array) + // - `T[N]` arrays (like Rust `[T; N]`), from `T ...[N]` + // - `T(A, B, C)` functions, from `T ...(A, B, C)` + // (Rust only has pointers to such types, `fn(A, B, C) -> T`) + // + // Notably, both sides are consumed outside-in (`left` LTR, `right` RTL), + // converging on the middle (where the name being declared is), and that + // can get confusing (an older comment below also tried to explain it). + // + // Once we run out of "type operators", and the declaration isn't trivial, + // only syntax left is parenthesization *around* the name being declared, + // with everything inside the parentheses applying *on top of* everything + // outside: but we've consumed everything outside so we're actually left + // with `T (...)` and we can simply drop the parentheses! + while !left.is_empty() || !right.is_empty() { + while let Some((&leftmost, after_leftmost)) = left.split_first() { + match leftmost { + CTok::Kw("const") => { + const_qualif + .insert_if_none(()) + .ok_or(ErrorKind::Multiple("const"))?; + } + CTok::Punct('*') => { + ty = CType::Ptr { + implicit_for_decay: false, + is_const: const_qualif.take().is_some(), + pointee: Box::new(ty), + }; + } + + // Outermost parentheses around the name being declared, + // handled together after both `left` and `right` loops. + CTok::Punct('(') => break, + + _ => return Err(ErrorKind::UnsupportedLeftmostToken(leftmost)), + } + left = after_leftmost; + } + 'right: while let Some(&rightmost) = right.last() { + // NOTE(eddyb) outermost (i.e. rightmost) suffixes apply first, + // and the only way this is "intuitive" is that e.g. a 2D array + // like `T m[A][B]` means `typeof(m[i][j]) = T`, and the lvalue + // syntax has to match the declaration (so `i < A` and `j < B`), + // IOW it's equivalent to `(T[B]) m[A]` / `typeof((m[i])[j]) = T` + // (if C had type parenthesization, or via C++ type aliases). + match rightmost { + CTok::Punct(']' | ')') => {} + + _ => return Err(ErrorKind::UnsupportedRightmostToken(rightmost)), + } + + // As `rightmost` is `]`/`)`, the matching `[`/`(` must be found. + let (before_rightmost_group, rightmost_group) = { + let mut i = right.len() - 1; + let mut nesting = 0; + loop { + let checked_dec = + |x: usize| x.checked_sub(1).ok_or(ErrorKind::UnbalancedBrackets); + match right[i] { + CTok::Punct(']' | ')') => nesting += 1, + CTok::Punct('[' | '(') => nesting = checked_dec(nesting)?, + _ => {} + } + if nesting == 0 { + break; + } + + // Outermost parentheses around the name being declared, + // handled together after both `left` and `right` loops. + if i == 0 && rightmost == CTok::Punct(')') { + break 'right; + } + + i = checked_dec(i)?; + } + right.split_at(i) + }; + + match rightmost_group { + [CTok::Punct('['), len @ .., CTok::Punct(']')] => { + ty = CType::Array { + element: Box::new(ty), + len: match len { + [CTok::ValueName(name)] => CArrayLen::Named(name), + [CTok::IntLit(lit)] => CArrayLen::Literal( + lit.parse().map_err(ErrorKind::InvalidIntLit)?, + ), + _ => return Err(ErrorKind::UnsupportedArrayLength), + }, + }; + } + [CTok::Punct('('), params @ .., CTok::Punct(')')] => { + if const_qualif.is_some() { + return Err(ErrorKind::Unused("const")); + } + + let params = match params { + [] => return Err(ErrorKind::Missing("parameters")), + [CTok::Kw("void")] => vec![], + _ => params + .split(|&tok| tok == CTok::Punct(',')) + .map(|param| CDecl::parse_inner(CDeclMode::FuncTypeParam, param)) + .collect::>()?, + }; + ty = CType::Func { + ret_ty: Some(ty).filter(|ty| *ty != CType::VOID).map(Box::new), + params, + }; + } + _ => return Err(ErrorKind::UnbalancedBrackets), + } + right = before_rightmost_group; + } + + // Outermost parentheses around the name being declared, handled here + // to ensure there is nothing else left around them, and can therefore + // be cleanly removed. + if let ([CTok::Punct('('), left_inner @ ..], [right_inner @ .., CTok::Punct(')')]) = + (left, right) + { + left = left_inner; + right = right_inner; + } + } + + // NOTE(eddyb) parameters to functions decay "into" pointers, but because + // we control the typesystem, we can keep both the array types, and the + // implicit pointer, closer to Rust e.g. `&[T; N]` arguments. + if let (CDeclMode::FuncParam, CType::Array { .. }) = (mode, &ty) { + ty = CType::Ptr { + implicit_for_decay: true, + is_const: const_qualif.take().is_some(), + pointee: Box::new(ty), + }; + } + + if const_qualif.is_some() { + return Err(ErrorKind::Unused("const")); + } + + Ok(CDecl { + ty, + name: decl_name, + bitfield_width, + }) + } +} diff --git a/analysis/src/lib.rs b/analysis/src/lib.rs index 88cb85962..042c82b2f 100644 --- a/analysis/src/lib.rs +++ b/analysis/src/lib.rs @@ -1,9 +1,53 @@ -use std::path::Path; +pub mod cdecl; +pub mod xml; -pub struct Analysis {} +use std::{fs, path::Path}; +use tracing::{debug, error_span}; + +/// Holds the analysis results for easy querying. +#[derive(Debug)] +pub struct Analysis { + vk: Library, + video: Library, +} impl Analysis { - pub fn new(_vulkan_headers_path: impl AsRef) -> Analysis { - Analysis {} + /// Analyse the provided copy of the + /// [Vulkan-Headers](https://github.com/KhronosGroup/Vulkan-Headers) repo. + pub fn new(vulkan_headers_path: impl AsRef) -> Analysis { + let vulkan_headers_path = vulkan_headers_path.as_ref(); + Analysis { + vk: Library::new(vulkan_headers_path.join("registry/vk.xml")), + video: Library::new(vulkan_headers_path.join("registry/video.xml")), + } + } + + /// Get "raw" Vulkan XML registry. + pub fn vk_xml(&self) -> &xml::Registry { + &self.vk.xml + } + + /// Get "raw" Vulkan Video XML registry. + pub fn video_xml(&self) -> &xml::Registry { + &self.video.xml + } +} + +#[derive(Debug)] +struct Library { + xml: xml::Registry, +} + +impl Library { + fn new(xml_path: impl AsRef) -> Library { + let xml = error_span!("xml", path = %xml_path.as_ref().display()).in_scope(|| { + debug!("reading xml"); + // We leak the input string here for convenience, to avoid explicit lifetimes. + let xml_input = Box::leak(fs::read_to_string(xml_path).unwrap().into_boxed_str()); + debug!("parsing xml"); + xml::Registry::parse(xml_input, "vulkan") + }); + + Library { xml } } } diff --git a/analysis/src/xml.rs b/analysis/src/xml.rs new file mode 100644 index 000000000..ae98bcc90 --- /dev/null +++ b/analysis/src/xml.rs @@ -0,0 +1,850 @@ +use crate::cdecl::{CDecl, CDeclMode, CTok, CType}; +use roxmltree::NodeType; +use roxmltree::StringStorage; +use std::fmt::Write; +use tracing::{info_span, trace}; + +/// A node with its `'input` lifetime set to `'static`. +type Node<'a> = roxmltree::Node<'a, 'static>; + +/// Converts `roxmltree`'s `StringStorage` to a `&'static str`. +/// +/// In nearly all cases this function will give you a slice from the original XML input, +/// but this is not always possible, for example when `"` gets replaced with normal quotes. +/// This does not happen often, so we are leaking that memory for convenience. +fn leak(string_storage: StringStorage<'static>) -> &'static str { + match string_storage { + StringStorage::Borrowed(s) => s, + StringStorage::Owned(s) => String::leak((*s).into()), + } +} + +/// Retrieves the value of the `node`'s attribute named `name`. +fn attribute(node: Node, name: &str) -> Option<&'static str> { + node.attribute_node(name) + .map(|attr| leak(attr.value_storage().clone())) +} + +/// Retrieves the ','-separated values of the `node`'s attribute named `name`. +fn attribute_comma_separated(node: Node, name: &str) -> Vec<&'static str> { + attribute(node, name) + .map(|value| value.split(',').collect()) + .unwrap_or_default() +} + +/// Retrieves the text inside the next child element of `node` named `name`. +fn child_text(node: Node, name: &str) -> Option<&'static str> { + let child = node.children().find(|node| node.has_tag_name(name)); + child.map(|node| leak(node.text_storage().unwrap().clone())) +} + +/// Returns [`true`] when the `node`'s "api" attribute matches the `expected` API. +fn api_matches(node: &Node, expected: &str) -> bool { + node.attribute("api") + .map(|values| values.split(',').any(|value| value == expected)) + .unwrap_or(true) +} + +/// Returns a "pseudo-XML" representation of the node, for use in tracing spans. +fn node_span_field(node: &Node) -> String { + let mut output = format!("<{:?}", node.tag_name()); + for attr in node.attributes() { + write!(output, " {}='{}'", attr.name(), attr.value()).unwrap(); + } + + output + ">" +} + +impl CDecl<'static> { + fn from_xml(mode: CDeclMode, children: roxmltree::Children<'_, 'static>) -> CDecl<'static> { + let mut c_tokens = vec![]; + for child in children { + let text = || leak(child.text_storage().unwrap().clone()); + match child.node_type() { + NodeType::Text => { + CTok::lex_into(text(), &mut c_tokens).unwrap(); + } + NodeType::Element => { + assert_eq!(child.attributes().len(), 0); + let text = || { + assert_eq!(child.children().count(), 1); + text() + }; + c_tokens.push(match child.tag_name().name() { + "comment" => continue, + "type" => CTok::TypeName(text()), + "enum" => CTok::ValueName(text()), + "name" => CTok::DeclName(text()), + tag => unreachable!("unexpected `<{tag}>` in C declaration"), + }) + } + NodeType::Root | NodeType::PI | NodeType::Comment => unreachable!(), + } + } + + c_tokens.retain_mut(|tok| { + if let CTok::StrayIdent(name) = tok { + match &name[..] { + // HACK(eddyb) work around `video.xml` spec bug (missing ``). + "STD_VIDEO_H264_MAX_NUM_LIST_REF" | "STD_VIDEO_H265_MAX_NUM_LIST_REF" => { + *tok = CTok::ValueName(name); + } + + // HACK(eddyb) work around `vk.xml` spec bug (missing ``). + "VkBool32" | "PFN_vkVoidFunction" => { + *tok = CTok::TypeName(name); + } + + _ => {} + } + } + + match tok { + // HACK(eddyb) ideally we'd expand this to something using the + // C++11/C23 `[[...]]` attribute syntax, but that'd need support + // in `cdecl`, and it's redundant since all function pointers + // equally get it, so we can just remove it here. + CTok::StrayIdent("VKAPI_PTR") => false, + + _ => true, + } + }); + + CDecl::parse(mode, &c_tokens).unwrap() + } +} + +/// Raw representation of Vulkan XML files (`vk.xml`, `video.xml`). +#[derive(Debug, Default)] +pub struct Registry { + pub externals: Vec, + pub basetypes: Vec, + pub bitmask_types: Vec, + pub bitmask_aliases: Vec, + pub handles: Vec, + pub handle_aliases: Vec, + pub enum_types: Vec, + pub enum_aliases: Vec, + pub funcpointers: Vec, + pub structs: Vec, + pub struct_aliases: Vec, + pub unions: Vec, + pub constants: Vec, + pub constant_aliases: Vec, + pub enums: Vec, + pub bitmasks: Vec, + pub commands: Vec, + pub command_aliases: Vec, + pub features: Vec, + pub extensions: Vec, +} + +impl Registry { + pub fn parse(input: &'static str, api: &str) -> Registry { + let doc = roxmltree::Document::parse(input).unwrap(); + Registry::from_node(doc.root_element(), api) + } + + fn from_node(registry_node: Node, api: &str) -> Registry { + let mut registry = Registry::default(); + for registry_child in registry_node + .children() + .filter(|node| api_matches(node, api)) + { + match registry_child.tag_name().name() { + "types" => { + for type_node in registry_child + .children() + .filter(|node| node.has_tag_name("type")) + .filter(|node| api_matches(node, api)) + { + let _s = info_span!("type", node = node_span_field(&type_node)).entered(); + trace!("encountered node"); + if type_node.has_attribute("alias") { + match type_node.attribute("category") { + Some("bitmask") => { + registry.bitmask_aliases.push(Alias::from_node(type_node)); + } + Some("handle") => { + registry.handle_aliases.push(Alias::from_node(type_node)); + } + Some("enum") => { + registry.enum_aliases.push(Alias::from_node(type_node)); + } + Some("struct") => { + registry.struct_aliases.push(Alias::from_node(type_node)); + } + _ => trace!("ignored"), + } + } else { + match type_node.attribute("category") { + Some("basetype") => { + registry.basetypes.push(BaseType::from_node(type_node)) + } + Some("bitmask") => registry + .bitmask_types + .push(BitMaskType::from_node(type_node)), + Some("handle") => { + registry.handles.push(Handle::from_node(type_node)) + } + Some("enum") => { + registry.enum_types.push(EnumType::from_node(type_node)) + } + Some("funcpointer") => registry + .funcpointers + .push(FuncPointer::from_node(type_node)), + Some("struct") => { + registry.structs.push(Structure::from_node(type_node, api)) + } + Some("union") => { + registry.unions.push(Structure::from_node(type_node, api)); + } + Some(_) => trace!("ignored"), + None => { + registry.externals.push(External::from_node(type_node)); + } + } + } + } + } + "enums" => { + let _s = info_span!("enum", node = node_span_field(®istry_child)).entered(); + trace!("encountered node"); + match registry_child.attribute("type") { + Some("enum") => registry.enums.push(Enum::from_node(registry_child, api)), + Some("bitmask") => registry + .bitmasks + .push(BitMask::from_node(registry_child, api)), + None if registry_child.attribute("name") == Some("API Constants") => { + for enum_node in registry_child + .children() + .filter(|node| node.has_tag_name("enum")) + .filter(|node| api_matches(node, api)) + { + if enum_node.has_attribute("alias") { + registry.constant_aliases.push(Alias::from_node(enum_node)); + } else { + registry.constants.push(Constant::from_node(enum_node)); + } + } + } + _ => trace!("ignored"), + } + } + "commands" => { + for command_node in registry_child + .children() + .filter(|node| node.has_tag_name("command")) + .filter(|node| api_matches(node, api)) + { + let _s = + info_span!("command", node = node_span_field(&command_node)).entered(); + trace!("encountered node"); + if command_node.has_attribute("alias") { + registry + .command_aliases + .push(Alias::from_node(command_node)); + } else { + registry + .commands + .push(Command::from_node(command_node, api)); + } + } + } + "feature" => { + let _s = + info_span!("feature", node = node_span_field(®istry_child)).entered(); + trace!("encountered node"); + registry + .features + .push(Feature::from_node(registry_child, api)); + } + "extensions" => { + for extension_node in registry_child + .children() + .filter(|node| node.has_tag_name("extension")) + .filter(|node| { + node.attribute("supported") + .map(|values| values.split(',').any(|support| support == api)) + .unwrap_or(true) + }) + { + let _s = info_span!("extension", node = node_span_field(&extension_node)) + .entered(); + trace!("encountered node"); + registry + .extensions + .push(Extension::from_node(extension_node, api)); + } + } + _ => (), + } + } + + registry + } +} + +#[derive(Debug)] +pub struct Alias { + pub name: &'static str, + pub alias: &'static str, +} + +impl Alias { + fn from_node(node: Node) -> Alias { + Alias { + name: attribute(node, "name").unwrap(), + alias: attribute(node, "alias").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct External { + pub name: &'static str, + pub requires: Option<&'static str>, +} + +impl External { + fn from_node(node: Node) -> External { + External { + name: attribute(node, "name").unwrap(), + requires: attribute(node, "requires"), + } + } +} + +#[derive(Debug)] +pub struct BaseType { + pub name: &'static str, + /// [`None`] indicates this being a platform-specific type. + pub ty: Option<&'static str>, +} + +impl BaseType { + fn from_node(node: Node) -> BaseType { + BaseType { + name: child_text(node, "name").unwrap(), + ty: child_text(node, "type"), + } + } +} + +#[derive(Debug)] +pub struct BitMaskType { + pub requires: Option<&'static str>, + pub bitvalues: Option<&'static str>, + pub ty: &'static str, + pub name: &'static str, +} + +impl BitMaskType { + fn from_node(node: Node) -> BitMaskType { + BitMaskType { + requires: attribute(node, "requires"), + bitvalues: attribute(node, "bitvalues"), + ty: child_text(node, "type").unwrap(), + name: child_text(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct Handle { + pub parent: Option<&'static str>, + pub objtypeenum: &'static str, + pub ty: &'static str, + pub name: &'static str, +} + +impl Handle { + fn from_node(node: Node) -> Handle { + Handle { + parent: attribute(node, "parent"), + objtypeenum: attribute(node, "objtypeenum").unwrap(), + ty: child_text(node, "type").unwrap(), + name: child_text(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct EnumType { + pub name: &'static str, +} + +impl EnumType { + fn from_node(node: Node) -> EnumType { + EnumType { + name: attribute(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct FuncPointer { + pub c_decl: CDecl<'static>, + pub requires: Option<&'static str>, +} + +impl FuncPointer { + fn from_node(node: Node) -> FuncPointer { + FuncPointer { + c_decl: CDecl::from_xml(CDeclMode::TypeDef, node.children()), + requires: attribute(node, "requires"), + } + } +} + +#[derive(Debug)] +pub struct StructureMember { + pub c_decl: CDecl<'static>, + pub values: Option<&'static str>, + pub len: Vec<&'static str>, + pub altlen: Option<&'static str>, + pub optional: Vec<&'static str>, +} + +impl StructureMember { + fn from_node(node: Node) -> StructureMember { + StructureMember { + c_decl: CDecl::from_xml(CDeclMode::StructMember, node.children()), + values: attribute(node, "values"), + len: attribute_comma_separated(node, "len"), + altlen: attribute(node, "altlen"), + optional: attribute_comma_separated(node, "optional"), + } + } +} + +#[derive(Debug)] +pub struct Structure { + pub name: &'static str, + pub structextends: Vec<&'static str>, + pub members: Vec, +} + +impl Structure { + fn from_node(node: Node, api: &str) -> Structure { + Structure { + name: attribute(node, "name").unwrap(), + structextends: attribute_comma_separated(node, "structextends"), + members: node + .children() + .filter(|node| node.has_tag_name("member")) + .filter(|node| api_matches(node, api)) + .map(StructureMember::from_node) + .collect(), + } + } +} + +#[derive(Debug)] +pub struct Constant { + pub ty: &'static str, + pub value: &'static str, + pub name: &'static str, +} + +impl Constant { + fn from_node(node: Node) -> Constant { + Constant { + ty: attribute(node, "type").unwrap(), + value: attribute(node, "value").unwrap(), + name: attribute(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct EnumValue { + pub value: &'static str, + pub name: &'static str, +} + +impl EnumValue { + fn from_node(node: Node) -> EnumValue { + EnumValue { + value: attribute(node, "value").unwrap(), + name: attribute(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct Enum { + pub name: &'static str, + pub values: Vec, + pub aliases: Vec, +} + +impl Enum { + fn from_node(node: Node, api: &str) -> Enum { + let mut value = Enum { + name: attribute(node, "name").unwrap(), + values: Vec::new(), + aliases: Vec::new(), + }; + + for variant in node + .children() + .filter(|node| node.has_tag_name("enum")) + .filter(|node| api_matches(node, api)) + { + if variant.has_attribute("alias") { + value.aliases.push(Alias::from_node(variant)); + } else { + value.values.push(EnumValue::from_node(variant)); + } + } + + value + } +} + +#[derive(Debug)] +pub struct BitMaskBit { + pub bitpos: &'static str, + pub name: &'static str, +} + +impl BitMaskBit { + fn from_node(node: Node) -> BitMaskBit { + BitMaskBit { + bitpos: attribute(node, "bitpos").unwrap(), + name: attribute(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct BitMask { + pub name: &'static str, + pub bits: Vec, + /// Some bitmask variants represent literal values instead of specific + /// individual bits, e.g. a combination of bits, or no bits at all. A good + /// example for this is `VkCullModeFlagBits::FRONT_AND_BACK`. + pub values: Vec, + pub aliases: Vec, +} + +impl BitMask { + fn from_node(node: Node, api: &str) -> BitMask { + let mut value = BitMask { + name: attribute(node, "name").unwrap(), + bits: Vec::new(), + values: Vec::new(), + aliases: Vec::new(), + }; + + for variant in node + .children() + .filter(|node| node.has_tag_name("enum")) + .filter(|node| api_matches(node, api)) + { + if variant.has_attribute("alias") { + value.aliases.push(Alias::from_node(variant)); + } else if variant.has_attribute("value") { + value.values.push(EnumValue::from_node(variant)); + } else { + value.bits.push(BitMaskBit::from_node(variant)); + } + } + + value + } +} + +#[derive(Debug)] +pub struct CommandParam { + pub c_decl: CDecl<'static>, + pub len: Option<&'static str>, + pub altlen: Option<&'static str>, + pub optional: Vec<&'static str>, +} + +impl CommandParam { + fn from_node(node: Node) -> CommandParam { + CommandParam { + c_decl: CDecl::from_xml(CDeclMode::FuncParam, node.children()), + len: attribute(node, "len"), + altlen: attribute(node, "altlen"), + optional: attribute_comma_separated(node, "optional"), + } + } +} + +#[derive(Debug)] +pub struct Command { + pub return_type: Option>, + pub name: &'static str, + pub params: Vec, +} + +impl Command { + fn from_node(node: Node, api: &str) -> Command { + let proto = node + .children() + .find(|child| child.has_tag_name("proto")) + .filter(|node| api_matches(node, api)) + .unwrap(); + // FIXME(eddyb) `CDeclMode::StructMember` should work but isn't accurate. + let proto_cdecl = CDecl::from_xml(CDeclMode::StructMember, proto.children()); + Command { + return_type: Some(proto_cdecl.ty).filter(|ty| *ty != CType::VOID), + name: proto_cdecl.name, + params: node + .children() + .filter(|child| child.has_tag_name("param")) + .filter(|node| api_matches(node, api)) + .map(CommandParam::from_node) + .collect(), + } + } +} + +#[derive(Debug)] +pub struct RequireConstant { + pub name: &'static str, + /// `Some` indicates a new constant being defined here. + pub value: Option<&'static str>, +} + +impl RequireConstant { + fn from_node(node: Node) -> RequireConstant { + RequireConstant { + name: attribute(node, "name").unwrap(), + value: attribute(node, "value"), + } + } +} + +#[derive(Debug)] +pub struct RequireEnumVariant { + pub name: &'static str, + pub offset: u8, + pub extends: &'static str, +} + +impl RequireEnumVariant { + fn from_node(node: Node) -> RequireEnumVariant { + RequireEnumVariant { + name: attribute(node, "name").unwrap(), + offset: attribute(node, "offset").unwrap().parse().unwrap(), + extends: attribute(node, "extends").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct RequireBitPos { + pub name: &'static str, + pub bitpos: u8, + pub extends: &'static str, +} + +impl RequireBitPos { + fn from_node(node: Node) -> RequireBitPos { + RequireBitPos { + name: attribute(node, "name").unwrap(), + bitpos: attribute(node, "bitpos").unwrap().parse().unwrap(), + extends: attribute(node, "extends").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct RequireType { + pub name: &'static str, +} + +impl RequireType { + fn from_node(node: Node) -> RequireType { + RequireType { + name: attribute(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct RequireCommand { + pub name: &'static str, +} + +impl RequireCommand { + fn from_node(node: Node) -> RequireCommand { + RequireCommand { + name: attribute(node, "name").unwrap(), + } + } +} + +#[derive(Debug)] +pub struct Version { + pub api: &'static str, + pub major: u32, + pub minor: u32, +} + +impl Version { + fn from_str(s: &'static str) -> Option { + let (api, major_minor) = s.split_once("_VERSION_")?; + + let mut iter = major_minor.split('_').flat_map(str::parse); + let (Some(major), Some(minor), None) = (iter.next(), iter.next(), iter.next()) else { + return None; + }; + + Some(Version { api, major, minor }) + } +} + +#[derive(Debug)] +pub enum Depends { + Version(Version), + Feature { + feature_structure: &'static str, + feature_member: &'static str, + }, + Extension(&'static str), +} + +impl Depends { + fn from_str(s: &'static str) -> Depends { + if let Some(version) = Version::from_str(s) { + Depends::Version(version) + } else if let Some((feature_structure, feature_member)) = s.split_once("::") { + Depends::Feature { + feature_structure, + feature_member, + } + } else { + Depends::Extension(s) + } + } +} + +#[derive(Debug, Default)] +pub struct Require { + pub depends: Vec, + pub enum_variants: Vec, + pub bitpositions: Vec, + pub constants: Vec, + pub types: Vec, + pub commands: Vec, +} + +impl Require { + fn from_node(node: Node, api: &str) -> Require { + let mut value = Require { + depends: attribute(node, "depends") + .map(|value| (value.split(',').map(Depends::from_str)).collect()) + .unwrap_or_default(), + ..Default::default() + }; + + for child in node.children().filter(|node| api_matches(node, api)) { + match child.tag_name().name() { + "enum" => { + if child.has_attribute("offset") { + value + .enum_variants + .push(RequireEnumVariant::from_node(child)); + } else if child.has_attribute("bitpos") { + value.bitpositions.push(RequireBitPos::from_node(child)); + } else { + value.constants.push(RequireConstant::from_node(child)); + } + } + "type" => value.types.push(RequireType::from_node(child)), + "command" => value.commands.push(RequireCommand::from_node(child)), + _ => (), + } + } + + value + } +} + +#[derive(Debug)] +pub struct Feature { + pub name: &'static str, + pub version: Version, + pub depends: Vec, + pub requires: Vec, +} + +impl Feature { + fn from_node(node: Node, api: &str) -> Feature { + let name = attribute(node, "name").unwrap(); + + Feature { + version: Version::from_str(name).unwrap(), + name, + depends: attribute(node, "depends") + .map(|value| (value.split(',').map(Depends::from_str)).collect()) + .unwrap_or_default(), + requires: node + .children() + .filter(|child| child.has_tag_name("require")) + .filter(|node| api_matches(node, api)) + .map(|child| Require::from_node(child, api)) + .collect(), + } + } +} + +#[derive(Debug)] +pub struct Extension { + pub name: &'static str, + pub number: Option, + pub ty: Option<&'static str>, + pub requires: Vec, +} + +impl Extension { + fn from_node(node: Node, api: &str) -> Extension { + Extension { + name: attribute(node, "name").unwrap(), + number: attribute(node, "number").map(|value| value.parse().unwrap()), + ty: attribute(node, "type"), + requires: node + .children() + .filter(|child| child.has_tag_name("require")) + .filter(|node| api_matches(node, api)) + .map(|child| Require::from_node(child, api)) + .collect(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tracing_test::traced_test; + + #[test] + #[traced_test] + fn vk_xml() { + let xml_input = Box::leak( + std::fs::read_to_string("../generator-rewrite/Vulkan-Headers/registry/vk.xml") + .unwrap() + .into_boxed_str(), + ); + + Registry::parse(xml_input, "vulkan"); + } + + #[test] + #[traced_test] + fn video_xml() { + let xml_input = Box::leak( + std::fs::read_to_string("../generator-rewrite/Vulkan-Headers/registry/video.xml") + .unwrap() + .into_boxed_str(), + ); + + Registry::parse(xml_input, "vulkan"); + } +} diff --git a/generator-rewrite/Cargo.toml b/generator-rewrite/Cargo.toml index 08e458ac0..875badb00 100644 --- a/generator-rewrite/Cargo.toml +++ b/generator-rewrite/Cargo.toml @@ -6,3 +6,5 @@ publish = false [dependencies] analysis = { path = "../analysis" } +tracing = "0.1" +tracing-subscriber = "0.3" diff --git a/generator-rewrite/Vulkan-Headers b/generator-rewrite/Vulkan-Headers new file mode 160000 index 000000000..2fa203425 --- /dev/null +++ b/generator-rewrite/Vulkan-Headers @@ -0,0 +1 @@ +Subproject commit 2fa203425eb4af9dfc6b03f97ef72b0b5bcb8350 diff --git a/generator-rewrite/src/main.rs b/generator-rewrite/src/main.rs index 0d35dbe2f..6cd165947 100644 --- a/generator-rewrite/src/main.rs +++ b/generator-rewrite/src/main.rs @@ -1,5 +1,7 @@ use analysis::Analysis; fn main() { - let _analysis = Analysis::new("generator/Vulkan-Headers"); + tracing_subscriber::fmt::init(); + let _analysis = Analysis::new("generator-rewrite/Vulkan-Headers"); + // dbg!(_analysis); }