diff --git a/.gitignore b/.gitignore index 25062cc..8089f12 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ zig-pkg .claude CLAUDE.md *.log +*.wav diff --git a/LICENSE.zitrus b/LICENSE.zitrus new file mode 100644 index 0000000..f18b392 --- /dev/null +++ b/LICENSE.zitrus @@ -0,0 +1,9 @@ +This project's 3DS backend uses components from Zitrus: + +Copyright © 2025 GasInfinity + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 6d491e4..946f511 100644 --- a/README.md +++ b/README.md @@ -67,11 +67,6 @@ pub fn build(b: *std.Build) void { .overrides = overrides, }); - // Compile a Slang shader for the selected backend and embed it at compile time - Aether.addShader(ae_dep.builder, b, exe, config, "basic", .{ - .slang = b.path("shaders/basic.slang"), - }); - // Export the artifact (produces EBOOT.PBP for PSP, install artifact otherwise) Aether.exportArtifact(ae_dep.builder, b, exe, config, .{ .title = "My Game", @@ -83,7 +78,7 @@ pub fn build(b: *std.Build) void { } ``` -The first argument to `addGame`, `addShader`, and `exportArtifact` is the +The first argument to `addGame` and `exportArtifact` is the dependency's builder (`ae_dep.builder`), and the second is your project's builder (`b`). This lets Aether resolve its own internal dependencies (GLFW, Vulkan, Slang, pspsdk) from its `build.zig.zon` while building artifacts that @@ -117,11 +112,12 @@ const MyState = struct { }; pub fn main(init: std.process.Init) !void { - const memory = try init.arena.allocator().alloc(u8, 32 * 1024 * 1024); + const memory = try init.gpa.alignedAlloc(u8, .fromByteUnits(16), 32 * 1024 * 1024); + defer init.gpa.free(memory); var my_state: MyState = undefined; var engine: ae.Engine = undefined; - try engine.init(init.io, memory, .{ + try engine.init(init.io, init.environ_map, memory, .{ .memory = .{ .render = 8 * 1024 * 1024, .audio = 2 * 1024 * 1024, @@ -159,10 +155,17 @@ zig build run -Dgfx=opengl # Build for PSP zig build -Dtarget=mipsel-psp +# Build for 3DS +zig build -Dtarget=arm-3ds-eabihf + # Build in release mode zig build -Doptimize=ReleaseFast ``` +3DS builds default to a 4 MiB regular libctru/newlib heap and a 60 MiB +linear heap. Aether's process allocator uses linear memory on 3DS, so engine +pool allocations and GPU upload buffers come from the same memory class. + ## Input System Actions are registered by name and bound to one or more input sources: @@ -190,11 +193,9 @@ const Vertex = struct { }); pub const Layout = Rendering.Pipeline.layout_from_struct(@This(), &Attributes); }; - -const MyMesh = Rendering.Mesh(Vertex); ``` -Shaders are written in [Slang](https://shader-slang.com/) (`.slang` files), compiled at build time via `addShader`, and embedded into the binary. Vulkan consumes SPIR-V; OpenGL consumes GLSL 4.50. PSP targets ignore shaders entirely (fixed-function pipeline); the build system generates empty stubs. +Aether owns its built-in pipeline shaders as backend internals. Downstream games create pipelines from vertex layouts; the selected backend compiles and embeds the shader code it needs. ## Build API Reference @@ -210,15 +211,6 @@ Creates a game executable with the engine module and platform dependencies wired | `optimize` | `OptimizeMode` | Optimization level (default: `.Debug`) | | `overrides` | `Config.Overrides` | Graphics/display mode overrides (default: `.{}`) | -### `Aether.addShader(owner, b, exe, config, name, paths)` - -Compiles a Slang shader for the selected backend and embeds it into the executable. Vulkan gets SPIR-V, OpenGL gets GLSL 4.50, and PSP targets get empty stubs. - -| Option | Type | Description | -|--------|------|-------------| -| `name` | `[]const u8` | Shader name (used for `@embedFile` lookup) | -| `paths.slang` | `LazyPath` | Path to the `.slang` source file | - ### `Aether.exportArtifact(owner, b, exe, config, opts)` Exports the build artifact. For PSP targets, produces an `EBOOT.PBP`. For desktop, installs the artifact normally. @@ -233,7 +225,7 @@ Exports the build artifact. For PSP targets, produces an `EBOOT.PBP`. For deskto ### `Aether.Config.resolve(target, overrides) -> Config` -Resolves the full engine configuration (platform, graphics backend, audio, input) from the build target and any user overrides. Pass the result to `addShader` and `exportArtifact`. +Resolves the full engine configuration (platform, graphics backend, audio, input) from the build target and any user overrides. Pass the result to `exportArtifact`. ### `Aether.Config.Overrides` diff --git a/build.zig b/build.zig index 03b5eb5..ed712c0 100644 --- a/build.zig +++ b/build.zig @@ -1,17 +1,32 @@ const std = @import("std"); const pspsdk = @import("pspsdk"); +const DEFAULT_3DS_HEAP_SIZE: u32 = 4 * 1024 * 1024; +const DEFAULT_3DS_LINEAR_HEAP_SIZE: u32 = 60 * 1024 * 1024; + pub const Platform = enum { windows, linux, macos, + wasm, psp, + /// Nintendo 3DS. Builtin os tag is `.@"3ds"`, but the Zig options + /// serializer can't emit `.@"3ds"` as an enum value literal, so the + /// internal Aether tag uses a leading-letter form. + nintendo_3ds, + /// Nintendo Switch. Zig 0.16 has no `switch`/`horizon` OS tag, so the + /// canonical target is `aarch64-freestanding-none` and we can't infer + /// the platform from `target.os.tag` alone. Opt in with + /// `-Dnintendo-switch=true`; `Config.resolve` then promotes a + /// freestanding aarch64 target to this variant. + nintendo_switch, }; pub const Gfx = enum { default, opengl, vulkan, + webgl, headless, }; @@ -39,6 +54,10 @@ pub const Config = struct { /// levels for the texture. Off by default since the extra VRAM cost /// only pays off for textures sampled at a wide range of distances. psp_mipmaps: bool = false, + /// 3DS: small regular heap reserved for libctru/newlib internals. + nintendo_3ds_heap_size: u32 = DEFAULT_3DS_HEAP_SIZE, + /// 3DS: linear heap used by Aether's process allocator and GPU uploads. + nintendo_3ds_linear_heap_size: u32 = DEFAULT_3DS_LINEAR_HEAP_SIZE, /// When true, `Core.paths.resolve` returns CWD for both resources /// and data, bypassing the platform-specific layout (.app Resources /// on mac, APPDATA on Windows, XDG on Linux). @@ -52,32 +71,43 @@ pub const Config = struct { /// - Debug builds of unpackaged binaries on macOS, where resources /// aren't inside a .app yet. use_cwd: bool = false, + /// Flush the file log after every message. Useful for diagnosing hard + /// hangs on consoles where normal shutdown never reaches logger.deinit. + flush_logs: bool = false, pub fn resolve(target: std.Build.ResolvedTarget, overrides: Overrides) Config { - const plat: Platform = switch (target.result.os.tag) { - .windows => .windows, - .macos => .macos, - .linux => .linux, - .psp => .psp, - else => |t| { - std.debug.panic("Unsupported OS! {}\n", .{t}); - }, + const plat: Platform = blk: { + if (overrides.nintendo_switch == true) { + if (target.result.cpu.arch != .aarch64 or target.result.os.tag != .freestanding) { + std.debug.panic( + "-Dnintendo-switch=true requires -Dtarget=aarch64-freestanding-none (got {s}-{s})\n", + .{ @tagName(target.result.cpu.arch), @tagName(target.result.os.tag) }, + ); + } + break :blk .nintendo_switch; + } + break :blk switch (target.result.os.tag) { + .windows => .windows, + .macos => .macos, + .linux => .linux, + .wasi => .wasm, + .psp => .psp, + .@"3ds" => .nintendo_3ds, + else => |t| { + std.debug.panic("Unsupported OS! {}\n", .{t}); + }, + }; }; const default_gfx: Gfx = switch (target.result.os.tag) { .windows => .vulkan, .macos => .vulkan, .linux => .vulkan, + .wasi => .webgl, else => .default, }; - // macOS default is `.none` because the current miniaudio build is - // bugged there. Flip back to `.default` with `-Daudio=default` once - // that's fixed. - const default_audio: Audio = switch (target.result.os.tag) { - .macos => .none, - else => .default, - }; + const default_audio: Audio = .default; return .{ .platform = plat, @@ -85,7 +115,10 @@ pub const Config = struct { .audio = overrides.audio orelse default_audio, .psp_display_mode = overrides.psp_display_mode orelse .rgba8888, .psp_mipmaps = overrides.psp_mipmaps orelse false, + .nintendo_3ds_heap_size = overrides.nintendo_3ds_heap_size orelse DEFAULT_3DS_HEAP_SIZE, + .nintendo_3ds_linear_heap_size = overrides.nintendo_3ds_linear_heap_size orelse DEFAULT_3DS_LINEAR_HEAP_SIZE, .use_cwd = overrides.use_cwd orelse false, + .flush_logs = overrides.flush_logs orelse false, }; } @@ -94,7 +127,13 @@ pub const Config = struct { audio: ?Audio = null, psp_display_mode: ?PspDisplayMode = null, psp_mipmaps: ?bool = null, + nintendo_3ds_heap_size: ?u32 = null, + nintendo_3ds_linear_heap_size: ?u32 = null, use_cwd: ?bool = null, + flush_logs: ?bool = null, + /// Promotes an `aarch64-freestanding-none` target to the + /// `nintendo_switch` platform. No effect when null/false. + nintendo_switch: ?bool = null, }; }; @@ -114,10 +153,17 @@ pub const HeadlessOptions = struct { overrides: Config.Overrides = .{}, }; -pub const ShaderPaths = struct { - slang: std.Build.LazyPath, +const ShaderStagePaths = struct { + vert: std.Build.LazyPath, + frag: std.Build.LazyPath, }; +const user_root_import_name = "aether_user_root"; + +pub fn userRootModule(exe: *std.Build.Step.Compile) *std.Build.Module { + return exe.root_module.import_table.get(user_root_import_name) orelse exe.root_module; +} + // Cached per-build user options. b.option panics on second declaration, so // these getters declare once and memoize. Accessed from both addGame (for // linking) and exportArtifact (for bundle packaging). Module-level mutable @@ -141,6 +187,79 @@ fn macosGlfwPath(b: *std.Build) []const u8 { return p; } +var devkitpro_path_cached: ?[]const u8 = null; +fn devkitProPath(b: *std.Build) []const u8 { + if (devkitpro_path_cached) |p| return p; + const opt = b.option([]const u8, "devkitpro-path", "3DS: devkitPro install root (default: $DEVKITPRO or /opt/devkitpro)"); + const p = opt orelse b.graph.environ_map.get("DEVKITPRO") orelse "/opt/devkitpro"; + devkitpro_path_cached = p; + return p; +} + +var spirv_cross_path_cached: ?[]const u8 = null; +fn spirvCrossPath(b: *std.Build) []const u8 { + if (spirv_cross_path_cached) |p| return p; + const p = b.option([]const u8, "spirv-cross-path", "WASM/browser: spirv-cross executable path (default: spirv-cross)") orelse "spirv-cross"; + spirv_cross_path_cached = p; + return p; +} + +pub fn webTarget(b: *std.Build) std.Build.ResolvedTarget { + return b.resolveTargetQuery(.{ + .cpu_arch = .wasm32, + .os_tag = .wasi, + .abi = .musl, + .cpu_features_add = std.Target.wasm.featureSet(&.{ .atomics, .bulk_memory }), + }); +} + +fn addNintendoCImportPaths(owner: *std.Build, mod: *std.Build.Module, config: Config, dkp: []const u8) void { + const b = mod.owner; + mod.addIncludePath(owner.path("src/platform")); + switch (config.platform) { + .nintendo_3ds => { + // Keep newlib before libctru so libctru's include_next sys wrappers + // resolve during Zig's C translation of SDK headers. + // + // Zig's 3DS C import can otherwise see newlib's fortified unistd + // wrappers and emit references to __ssp_real_* symbols. devkitARM is + // built without libssp, so keep fortify off for translated SDK calls. + mod.addCMacro("_FORTIFY_SOURCE", "0"); + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitARM/arm-none-eabi/include" }) }); + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libctru/include" }) }); + }, + .nintendo_switch => { + // Zig's Switch C import can otherwise see newlib's fortified + // wrappers and emit references to __ssp_real_* symbols. + mod.addCMacro("_FORTIFY_SOURCE", "0"); + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitA64/aarch64-none-elf/include" }) }); + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libnx/include" }) }); + }, + else => {}, + } +} + +/// Creates a `3dslink` command for pushing an installed `.3dsx` to a +/// networked 3DS. Reuses Aether's devkitPro option/cache so downstream +/// builds do not need to redeclare `-Ddevkitpro-path`. +pub fn add3dslink(b: *std.Build, threedsx_path: []const u8) *std.Build.Step.Run { + const dkp = devkitProPath(b); + const link_cmd = b.addSystemCommand(&.{b.pathJoin(&.{ dkp, "tools/bin/3dslink" })}); + if (b.option([]const u8, "3dslink-address", "3DS: target IP for 3dslink push (default: mDNS auto-discover)")) |ip| { + link_cmd.addArgs(&.{ "-a", ip }); + } + if (b.option(u32, "3dslink-retries", "3DS: 3dslink retry count (default: 10)")) |n| { + link_cmd.addArgs(&.{ "-r", b.fmt("{d}", .{n}) }); + } + if (b.option(bool, "3dslink-server", "3DS: pass -s so 3dslink stays listening after the upload (useful for some Rosalina versions and for stdout relay)") orelse false) { + link_cmd.addArg("-s"); + } + link_cmd.addArg(threedsx_path); + link_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| link_cmd.addArgs(args); + return link_cmd; +} + /// Creates an executable with the Aether engine module and all platform /// dependencies wired up. Returns the compile step so the caller can /// further customize it (install, add run steps, etc.). @@ -156,6 +275,17 @@ fn macosGlfwPath(b: *std.Build) []const u8 { /// the actual build steps and executable. pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.Step.Compile { const config = Config.resolve(opts.target, opts.overrides); + const uses_nintendo_c_io = config.platform == .nintendo_3ds or config.platform == .nintendo_switch; + + // 3DS and Switch force ofmt=c — there's no Zig-native backend for + // either Horizon target yet, so we emit C and let an external + // toolchain (devkitARM/libctru on 3DS, devkitA64/libnx on Switch) + // compile the result. + const target = if (uses_nintendo_c_io) blk: { + var q = opts.target.query; + q.ofmt = .c; + break :blk b.resolveTargetQuery(q); + } else opts.target; const options = b.addOptions(); options.addOption(Config, "config", config); @@ -163,7 +293,8 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S const mod = b.addModule("Aether", .{ .root_source_file = owner.path("src/root.zig"), - .target = opts.target, + .target = target, + .link_libc = if (uses_nintendo_c_io) true else null, .imports = &.{ .{ .name = "options", .module = options_module }, }, @@ -171,20 +302,27 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S // --- platform-specific engine dependencies --- const psp_dep = if (config.platform == .psp) owner.dependency("pspsdk", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }) else null; if (psp_dep) |pd| { mod.addImport("pspsdk", pd.module("pspsdk")); + } else if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) { + // Console SDK symbols are declared as backend-local externs and + // resolved by the export pipeline's devkitPro link step. + } else if (config.platform == .wasm) { + // Browser builds use host imports for WebGL/Web Audio/input and WASI + // imports for files, clocks, stdio, random, and environment. They do + // not link desktop windowing/audio dependencies. } else { const zglfw = owner.dependency("zglfw", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }); const glfw = owner.dependency("glfw_zig", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }); @@ -203,15 +341,33 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S mod.addImport("vulkan", vulkan); if (config.audio != .none) { - const zaudio_dep = owner.dependency("zaudio", .{ - .target = opts.target, + const sdl3_dep = owner.lazyDependency("sdl3", .{ + .target = target, .optimize = opts.optimize, - }); - mod.addImport("zaudio", zaudio_dep.module("root")); - mod.linkLibrary(zaudio_dep.artifact("miniaudio")); + .main = false, + .ext_image = false, + .ext_net = false, + .ext_ttf = false, + // Static SDL3 and static GLFW both embed generated Wayland + // protocol objects on Linux. Keep SDL dynamic there since + // Aether only uses its audio subsystem. + .c_sdl_preferred_linkage = @as( + std.builtin.LinkMode, + if (target.result.os.tag == .linux) .dynamic else .static, + ), + }) orelse @panic("sdl3 dependency is required when desktop audio is enabled"); + mod.addImport("sdl3", sdl3_dep.module("sdl3")); + + if (target.result.os.tag == .linux) { + mod.addRPathSpecial("$ORIGIN"); + const install_sdl3 = b.addInstallArtifact(sdl3_dep.artifact("SDL3"), .{ + .dest_dir = .{ .override = .bin }, + }); + b.getInstallStep().dependOn(&install_sdl3.step); + } } - if (opts.target.result.os.tag == .macos) { + if (target.result.os.tag == .macos) { // Link MoltenVK directly as the Vulkan ICD -- no loader. Feeds // its vkGetInstanceProcAddr into GLFW via glfwInitVulkanLoader // in platform/glfw/surface.zig so GLFW doesn't dlopen libvulkan @@ -236,26 +392,54 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S } } + if (uses_nintendo_c_io) { + addNintendoCImportPaths(owner, mod, config, devkitProPath(b)); + } + if (config.platform == .nintendo_3ds and config.gfx == .default) { + add3dsMangoImport(owner, b, mod, target, opts.optimize); + } + + addInternalShaderModule(owner, b, mod, config); + // --- user executable --- + const user_mod = b.createModule(.{ + .root_source_file = opts.root_source_file, + .target = target, + .optimize = opts.optimize, + .strip = if (config.platform == .psp) false else null, + .link_libc = if (uses_nintendo_c_io) true else null, + .imports = &.{ + .{ .name = "aether", .module = mod }, + }, + }); + + const root_mod = if (uses_nintendo_c_io) b.createModule(.{ + .root_source_file = owner.path(switch (config.platform) { + .nintendo_3ds => "src/platform/3ds/services.zig", + .nintendo_switch => "src/platform/switch/services.zig", + else => unreachable, + }), + .target = target, + .optimize = opts.optimize, + .link_libc = true, + .imports = &.{ + .{ .name = "aether", .module = mod }, + .{ .name = user_root_import_name, .module = user_mod }, + .{ .name = "options", .module = options_module }, + }, + }) else user_mod; + const exe = b.addExecutable(.{ .name = opts.name, - .root_module = b.createModule(.{ - .root_source_file = opts.root_source_file, - .target = opts.target, - .optimize = opts.optimize, - .strip = if (config.platform == .psp) false else null, - .imports = &.{ - .{ .name = "aether", .module = mod }, - }, - }), + .root_module = root_mod, }); if (psp_dep) |pd| { // Inline PSP config -- pspsdk.configurePspExecutable uses // dependencyFromBuildZig on exe.step.owner which fails when // the exe is owned by a downstream builder. - if (exe.root_module.import_table.get("pspsdk") == null) { - exe.root_module.addImport("pspsdk", mod.import_table.get("pspsdk").?); + if (userRootModule(exe).import_table.get("pspsdk") == null) { + userRootModule(exe).addImport("pspsdk", mod.import_table.get("pspsdk").?); } exe.link_eh_frame_hdr = true; exe.link_emit_relocs = true; @@ -267,6 +451,21 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S exe.subsystem = .windows; } + if (uses_nintendo_c_io) { + // The platform shim exports C `main` itself. Keeping std/start's + // libc main wrapper disabled avoids pulling in unsupported + // freestanding libc/thread startup paths while still preserving the + // exported shim in the emitted C. + exe.entry = .disabled; + } else if (config.platform == .wasm) { + exe.entry = .disabled; + exe.rdynamic = true; + exe.wasi_exec_model = .reactor; + exe.shared_memory = true; + exe.initial_memory = 64 * 1024 * 1024; + exe.max_memory = 256 * 1024 * 1024; + } + return exe; } @@ -285,6 +484,14 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std var config = Config.resolve(opts.target, opts.overrides); config.gfx = .headless; config.audio = .none; + const uses_nintendo_c_io = config.platform == .nintendo_3ds or config.platform == .nintendo_switch; + + // 3DS and Switch force ofmt=c (see addGame for details). + const target = if (uses_nintendo_c_io) blk: { + var q = opts.target.query; + q.ofmt = .c; + break :blk b.resolveTargetQuery(q); + } else opts.target; const options = b.addOptions(); options.addOption(Config, "config", config); @@ -292,14 +499,15 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std const mod = b.addModule("Aether", .{ .root_source_file = owner.path("src/root.zig"), - .target = opts.target, + .target = target, + .link_libc = if (uses_nintendo_c_io) true else null, .imports = &.{ .{ .name = "options", .module = options_module }, }, }); const psp_dep = if (config.platform == .psp) owner.dependency("pspsdk", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }) else null; @@ -307,22 +515,45 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std mod.addImport("pspsdk", pd.module("pspsdk")); } + if (uses_nintendo_c_io) { + addNintendoCImportPaths(owner, mod, config, devkitProPath(b)); + } + + const user_mod = b.createModule(.{ + .root_source_file = opts.root_source_file, + .target = target, + .optimize = opts.optimize, + .strip = if (config.platform == .psp) false else null, + .link_libc = if (uses_nintendo_c_io) true else null, + .imports = &.{ + .{ .name = "aether", .module = mod }, + }, + }); + + const root_mod = if (uses_nintendo_c_io) b.createModule(.{ + .root_source_file = owner.path(switch (config.platform) { + .nintendo_3ds => "src/platform/3ds/services.zig", + .nintendo_switch => "src/platform/switch/services.zig", + else => unreachable, + }), + .target = target, + .optimize = opts.optimize, + .link_libc = true, + .imports = &.{ + .{ .name = "aether", .module = mod }, + .{ .name = user_root_import_name, .module = user_mod }, + .{ .name = "options", .module = options_module }, + }, + }) else user_mod; + const exe = b.addExecutable(.{ .name = opts.name, - .root_module = b.createModule(.{ - .root_source_file = opts.root_source_file, - .target = opts.target, - .optimize = opts.optimize, - .strip = if (config.platform == .psp) false else null, - .imports = &.{ - .{ .name = "aether", .module = mod }, - }, - }), + .root_module = root_mod, }); if (psp_dep) |pd| { - if (exe.root_module.import_table.get("pspsdk") == null) { - exe.root_module.addImport("pspsdk", mod.import_table.get("pspsdk").?); + if (userRootModule(exe).import_table.get("pspsdk") == null) { + userRootModule(exe).addImport("pspsdk", mod.import_table.get("pspsdk").?); } exe.link_eh_frame_hdr = true; exe.link_emit_relocs = true; @@ -330,6 +561,17 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std exe.setLinkerScript(pd.path("tools/linkfile.ld")); } + if (uses_nintendo_c_io) { + exe.entry = .disabled; + } else if (config.platform == .wasm) { + exe.entry = .disabled; + exe.rdynamic = true; + exe.wasi_exec_model = .reactor; + exe.shared_memory = true; + exe.initial_memory = 64 * 1024 * 1024; + exe.max_memory = 256 * 1024 * 1024; + } + return exe; } @@ -356,10 +598,13 @@ fn slangcPath(owner: *std.Build) ?std.Build.LazyPath { return dep.path(exe_name); } -fn addSlangStep(b: *std.Build, slangc: ?std.Build.LazyPath, args: []const []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) ?std.Build.LazyPath { - const sc = slangc orelse return null; +fn requireSlangcPath(owner: *std.Build) std.Build.LazyPath { + return slangcPath(owner) orelse @panic("slangc dependency unavailable; run zig build --fetch"); +} + +fn addSlangStep(b: *std.Build, slangc: std.Build.LazyPath, args: []const []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { const run = std.Build.Step.Run.create(b, "slangc " ++ output_name); - run.addFileArg(sc); + run.addFileArg(slangc); run.addArgs(args); run.addArg("-o"); const output = run.addOutputFileArg(output_name); @@ -367,6 +612,202 @@ fn addSlangStep(b: *std.Build, slangc: ?std.Build.LazyPath, args: []const []cons return output; } +fn addSpirvCrossStep(b: *std.Build, spirv_cross: []const u8, args: []const []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { + const run = b.addSystemCommand(&.{spirv_cross}); + run.setName("spirv-cross " ++ output_name); + run.addFileArg(input); + run.addArgs(args); + run.addArg("--output"); + return run.addOutputFileArg(output_name); +} + +fn addUamStep(b: *std.Build, uam: []const u8, stage: []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { + const run = b.addSystemCommand(&.{ uam, "-s", stage, "-o" }); + const output = run.addOutputFileArg(output_name); + run.addFileArg(input); + return output; +} + +fn addPicassoStep(b: *std.Build, picasso: []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { + const run = b.addSystemCommand(&.{ picasso, "-o" }); + const output = run.addOutputFileArg(output_name); + run.addFileArg(input); + return output; +} + +fn add3dsMangoImport(owner: *std.Build, b: *std.Build, mod: *std.Build.Module, target: std.Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) void { + const zsflt = owner.dependency("zsflt", .{}).module("zsflt"); + const dkp = devkitProPath(owner); + const zitrus_mod = b.createModule(.{ + .root_source_file = owner.path("src/platform/3ds/mango/zitrus.zig"), + .target = target, + .optimize = optimize, + .imports = &.{ + .{ .name = "zsflt", .module = zsflt }, + }, + }); + zitrus_mod.addImport("zitrus", zitrus_mod); + zitrus_mod.addIncludePath(owner.path("src/platform")); + zitrus_mod.addCMacro("_FORTIFY_SOURCE", "0"); + zitrus_mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitARM/arm-none-eabi/include" }) }); + zitrus_mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libctru/include" }) }); + mod.addImport("zitrus", zitrus_mod); +} + +fn addMangoPsmStep(owner: *std.Build, b: *std.Build, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { + const zsflt = owner.dependency("zsflt", .{}).module("zsflt"); + const host_target = b.resolveTargetQuery(.{}); + const zitrus_mod = b.createModule(.{ + .root_source_file = owner.path("src/platform/3ds/mango/zitrus_tools.zig"), + .target = host_target, + .optimize = .Debug, + .imports = &.{ + .{ .name = "zsflt", .module = zsflt }, + }, + }); + zitrus_mod.addImport("zitrus", zitrus_mod); + + const tool = b.addExecutable(.{ + .name = "aether-psm-to-zpsh", + .root_module = b.createModule(.{ + .root_source_file = owner.path("src/platform/3ds/mango/tools/psm_to_zpsh.zig"), + .target = host_target, + .optimize = .Debug, + .imports = &.{ + .{ .name = "zitrus", .module = zitrus_mod }, + }, + }), + }); + + const run = b.addRunArtifact(tool); + run.setName("assemble 3ds mango shader"); + run.addFileArg(input); + return run.addOutputFileArg(output_name); +} + +fn addInternalShaderModule(owner: *std.Build, b: *std.Build, mod: *std.Build.Module, config: Config) void { + const stages = internalShaderStages(owner, b, config) orelse return; + + const files = b.addWriteFiles(); + _ = files.addCopyFile(stages.vert, "basic.vert"); + _ = files.addCopyFile(stages.frag, "basic.frag"); + const root = files.add("aether_shaders.zig", + \\pub const basic_vert align(@alignOf(u32)) = @embedFile("basic.vert").*; + \\pub const basic_frag align(@alignOf(u32)) = @embedFile("basic.frag").*; + \\ + ); + + mod.addImport("aether_shaders", b.createModule(.{ + .root_source_file = root, + })); +} + +fn internalShaderStages(owner: *std.Build, b: *std.Build, config: Config) ?ShaderStagePaths { + if (config.platform == .nintendo_3ds and config.gfx == .default) { + const vert = addMangoPsmStep(owner, b, "basic.psh", owner.path("src/platform/3ds/shaders/basic.psm")); + const files = b.addWriteFiles(); + const frag = files.add("basic.frag.stub", ""); + return .{ .vert = vert, .frag = frag }; + } + + if (config.platform == .nintendo_switch and config.gfx == .default) { + const uam = b.pathJoin(&.{ devkitProPath(b), "tools/bin/uam" }); + const slangc = requireSlangcPath(owner); + const source = owner.path("src/rendering/shaders/basic.slang"); + const vert_glsl = addSlangStep(b, slangc, &.{ + "-target", "glsl", "-matrix-layout-column-major", + "-profile", "glsl_450", "-entry", + "vertexMain", "-stage", "vertex", + }, "basic.vert.switch.glsl", source); + const frag_glsl = addSlangStep(b, slangc, &.{ + "-target", "glsl", "-matrix-layout-column-major", + "-profile", "glsl_450", "-entry", + "fragmentMain", "-stage", "fragment", + }, "basic.frag.switch.glsl", source); + return .{ + .vert = addUamStep( + b, + uam, + "vert", + "basic.vert.dksh", + vert_glsl, + ), + .frag = addUamStep( + b, + uam, + "frag", + "basic.frag.dksh", + frag_glsl, + ), + }; + } + + switch (config.gfx) { + .vulkan => { + const slangc = requireSlangcPath(owner); + const source = owner.path("src/rendering/shaders/basic.slang"); + return .{ + .vert = addSlangStep(b, slangc, &.{ + "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", + "-DVULKAN", "-entry", "vertexMain", "-stage", + "vertex", + }, "basic.vert.spv", source), + .frag = addSlangStep(b, slangc, &.{ + "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", + "-DVULKAN", "-entry", "fragmentMain", "-stage", + "fragment", + }, "basic.frag.spv", source), + }; + }, + .opengl => { + const slangc = requireSlangcPath(owner); + const source = owner.path("src/rendering/shaders/basic.slang"); + return .{ + .vert = addSlangStep(b, slangc, &.{ + "-target", "glsl", "-matrix-layout-column-major", + "-profile", "glsl_450", "-entry", + "vertexMain", "-stage", "vertex", + }, "basic.vert.glsl", source), + .frag = addSlangStep(b, slangc, &.{ + "-target", "glsl", "-matrix-layout-column-major", + "-profile", "glsl_450", "-entry", + "fragmentMain", "-stage", "fragment", + }, "basic.frag.glsl", source), + }; + }, + .webgl => { + const slangc = requireSlangcPath(owner); + const spirv_cross = spirvCrossPath(b); + const source = owner.path("src/rendering/shaders/basic.slang"); + const vert_spv = addSlangStep(b, slangc, &.{ + "-entry", "vertexMain", "-stage", "vertex", + "-profile", "glsl_330", "-emit-spirv-via-glsl", "-matrix-layout-column-major", + }, "basic.vert.webgl.spv", source); + const frag_spv = addSlangStep(b, slangc, &.{ + "-entry", "fragmentMain", "-stage", "fragment", + "-profile", "glsl_330", "-emit-spirv-via-glsl", "-matrix-layout-column-major", + }, "basic.frag.webgl.spv", source); + return .{ + .vert = addSpirvCrossStep(b, spirv_cross, &.{ + "--es", "--version", "300", + "--rename-interface-variable", "out", "0", + "v_uv", "--rename-interface-variable", "out", + "1", "v_color", "--rename-interface-variable", + "out", "2", "v_viewDepth", + }, "basic.vert.webgl.glsl", vert_spv), + .frag = addSpirvCrossStep(b, spirv_cross, &.{ + "--es", "--version", "300", + "--rename-interface-variable", "in", "0", + "v_uv", "--rename-interface-variable", "in", + "1", "v_color", "--rename-interface-variable", + "in", "2", "v_viewDepth", + }, "basic.frag.webgl.glsl", frag_spv), + }; + }, + .default, .headless => return null, + } +} + pub const ExportOptions = struct { /// PSP/macOS: human-readable name shown to the OS (XMB title on PSP, /// CFBundleName on macOS). Ignored elsewhere. @@ -391,8 +832,41 @@ pub const ExportOptions = struct { icon_png: ?std.Build.LazyPath = null, /// Files to install into the app bundle. On macOS they land under /// `Contents/Resources/`. On desktop non-macOS they are copied - /// alongside the exe in `zig-out/bin/`. Ignored on PSP. + /// alongside the exe in `zig-out/bin/`. Ignored on PSP and 3DS. resources: []const Resource = &.{}, + /// WASM/browser: directory copied into the web artifact root and exposed + /// through `resources.manifest` for the JavaScript WASI preloader. + web_resources: ?std.Build.LazyPath = null, + /// WASM/browser: individual files copied into the web artifact root. + web_resource_files: []const Resource = &.{}, + /// WASM/browser: newline-delimited resource paths relative to + /// `web_resources`. + web_resource_manifest: []const u8 = "", + /// WASM/browser: destination wasm filename. Defaults to the name expected + /// by the stock Aether web loader. + web_wasm_name: []const u8 = "Aether.wasm", + /// 3DS: SMDH long description (the second line shown in the HOME + /// menu detail panel). Falls back to "Built with Aether" when empty. + smdh_long_description: []const u8 = "", + /// 3DS: SMDH author string. Empty leaves the field blank. + smdh_author: []const u8 = "", + /// 3DS: 48x48 PNG icon embedded in the SMDH. When null, libctru's + /// `default_icon.png` is used. + smdh_icon: ?std.Build.LazyPath = null, + /// 3DS: directory (or pre-built `.romfs`) embedded into the 3DSX. + romfs: ?std.Build.LazyPath = null, + /// Switch: NACP author string (shows under the title in the HOME + /// menu). Empty falls back to "Aether". + switch_author: []const u8 = "", + /// Switch: NACP version string (e.g. "1.0.0"). Empty falls back to + /// "1.0.0". + switch_version: []const u8 = "", + /// Switch: 256x256 JPEG icon embedded in the NRO. When null, libnx's + /// `default_icon.jpg` is used. + switch_icon: ?std.Build.LazyPath = null, + /// Switch: directory embedded into the NRO as RomFS. When null, no + /// RomFS is attached. + switch_romfs: ?std.Build.LazyPath = null, pub const Resource = struct { /// Source file to copy. @@ -416,6 +890,13 @@ pub fn exportArtifact(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Com // register on the downstream project's builder. const psp_dep = owner.dependency("pspsdk", .{}); _ = pspEbootPipeline(b, exe, psp_dep, opts); + } else if (config.platform == .nintendo_3ds) { + threedsxPipeline(b, exe, opts); + } else if (config.platform == .nintendo_switch) { + switchNroPipeline(b, exe, opts); + } else if (config.platform == .wasm) { + const install = addWebBundle(owner, b, exe, opts); + b.getInstallStep().dependOn(&install.step); } else if (config.platform == .macos) { macosAppBundle(b, exe, opts); } else { @@ -427,6 +908,50 @@ pub fn exportArtifact(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Com } } +pub fn addWebBundle(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOptions) *std.Build.Step.InstallDir { + const web = b.addWriteFiles(); + _ = web.addCopyFile(exe.getEmittedBin(), opts.web_wasm_name); + _ = web.addCopyFile(owner.path("web/index.html"), "index.html"); + _ = web.addCopyFile(owner.path("web/aether.js"), "aether.js"); + _ = web.add("resources.manifest", opts.web_resource_manifest); + if (opts.web_resources) |resource_dir| { + _ = web.addCopyDirectory(resource_dir, "", .{}); + } + for (opts.web_resource_files) |res| { + _ = web.addCopyFile(res.path, res.name); + } + + return b.addInstallDirectory(.{ + .source_dir = web.getDirectory(), + .install_dir = .prefix, + .install_subdir = "web", + }); +} + +pub fn addServeWebStep( + owner: *std.Build, + b: *std.Build, + name: []const u8, + web_install: *std.Build.Step.InstallDir, + host: []const u8, + port: u16, +) *std.Build.Step.Run { + const serve_web_exe = b.addExecutable(.{ + .name = name, + .root_module = b.createModule(.{ + .root_source_file = owner.path("tools/serve_web.zig"), + .target = b.resolveTargetQuery(.{}), + .optimize = .Debug, + }), + }); + const serve_web_cmd = b.addRunArtifact(serve_web_exe); + serve_web_cmd.step.dependOn(&web_install.step); + serve_web_cmd.addArg(b.getInstallPath(.prefix, "web")); + serve_web_cmd.addArg(host); + serve_web_cmd.addArg(b.fmt("{d}", .{port})); + return serve_web_cmd; +} + /// Builds a `.app` directory under zig-out/bin/ with: /// Contents/MacOS/ -- patched load commands /// Contents/Frameworks/libMoltenVK.dylib -- id rewritten to @rpath @@ -660,73 +1185,495 @@ fn pspEbootPipeline(b: *std.Build, exe: *std.Build.Step.Compile, psp_dep: *std.B return result; } -/// Registers a shader pair for the game executable. Slang sources are -/// compiled to SPIR-V (Vulkan) or GLSL (OpenGL) via slangc. On -/// shaderless platforms (PSP), empty stubs are provided. -/// -/// When called from Aether's own build, use `addShader(b, b, ...)`. -/// From a downstream project: -/// -/// Aether.addShader(ae_dep.builder, b, exe, config, "basic", .{ ... }); -/// -pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, config: Config, comptime name: []const u8, paths: ShaderPaths) void { - switch (config.gfx) { - .vulkan => { - const slangc = slangcPath(owner); - const vert = addSlangStep(b, slangc, &.{ - "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", - "-DVULKAN", "-entry", "vertexMain", "-stage", - "vertex", - }, name ++ ".vert.spv", paths.slang); - const frag = addSlangStep(b, slangc, &.{ - "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", - "-DVULKAN", "-entry", "fragmentMain", "-stage", - "fragment", - }, name ++ ".frag.spv", paths.slang); - if (vert) |v| exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); - if (frag) |f| exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); - }, - .opengl => { - const slangc = slangcPath(owner); - const vert = addSlangStep(b, slangc, &.{ - "-target", "glsl", "-matrix-layout-column-major", - "-profile", "glsl_450", "-entry", - "vertexMain", "-stage", "vertex", - }, name ++ ".vert.glsl", paths.slang); - const frag = addSlangStep(b, slangc, &.{ - "-target", "glsl", "-matrix-layout-column-major", - "-profile", "glsl_450", "-entry", - "fragmentMain", "-stage", "fragment", - }, name ++ ".frag.glsl", paths.slang); - if (vert) |v| exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); - if (frag) |f| exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); - }, - .default, .headless => { - // Provide empty stubs so @embedFile(name ++ "_vert") still compiles. - const empty = b.addWriteFiles(); - const stub = empty.add(name ++ "_stub", ""); - exe.root_module.addAnonymousImport(name ++ "_vert", .{ - .root_source_file = stub, - }); - exe.root_module.addAnonymousImport(name ++ "_frag", .{ - .root_source_file = stub, - }); - }, +fn patch3dsGeneratedC(b: *std.Build, exe: *std.Build.Step.Compile) std.Build.LazyPath { + const patch = b.addSystemCommand(&.{ + "perl", "-e", + \\local $/; + \\my $src = <>; + \\my %align16 = (); + \\while ($src =~ /zig_static_assert\(_Alignof \(struct ([A-Za-z0-9_]+)\) == 16,/g) { + \\ $align16{$1} = 1; + \\} + \\my $pending = ""; + \\for my $line (split /(?<=\n)/, $src) { + \\ if ($pending ne "") { + \\ if ($line =~ s/^};/} __attribute__((aligned(16)));/) { + \\ $pending = ""; + \\ } + \\ } elsif ($line =~ /^struct\s+([A-Za-z0-9_]+)\s*\{/) { + \\ my $name = $1; + \\ if ($align16{$name}) { + \\ if ($line !~ s/\};/} __attribute__((aligned(16)));/) { + \\ $pending = $name; + \\ } + \\ } + \\ } + \\ print $line; + \\} + }); + patch.addArtifactArg(exe); + return patch.captureStdOut(.{ .basename = b.fmt("{s}.3ds.c", .{exe.name}) }); +} + +fn cBackendOptimizeMode(exe: *std.Build.Step.Compile) std.builtin.OptimizeMode { + return exe.root_module.optimize orelse .Debug; +} + +fn cBackendGccOptimizeArg(optimize: std.builtin.OptimizeMode) []const u8 { + return switch (optimize) { + .Debug => "-O0", + .ReleaseSafe, .ReleaseFast => "-O2", + .ReleaseSmall => "-Os", + }; +} + +fn cBackendGccDebugArg(optimize: std.builtin.OptimizeMode) []const u8 { + return if (optimize == .Debug or optimize == .ReleaseSafe) "-g" else "-g0"; +} + +/// Compiles the zig-emitted C with devkitARM, links against libctru, and +/// packages the ELF (plus an SMDH and optional RomFS) into a `.3dsx` +/// homebrew bundle. Mirrors `pspEbootPipeline` for the PSP toolchain. +fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOptions) void { + // Derive a sibling target for compiler_rt: same cpu/abi/endianness + // as the game (so the calling conventions and float ABI match + // libctru), but os=freestanding (sidesteps the 3DS-specific posix + // dependencies in std) and the default object format (so this + // module compiles natively to an ELF object the gcc driver can + // consume, rather than .c). devkitARM's libgcc.a doesn't ship the + // 128-bit-int compiler-rt entry points (`__multi3`/`__divti3`/etc.), + // so we provide them ourselves from zig's compiler_rt. + const game_target = exe.root_module.resolved_target.?; + var crt_query = game_target.query; + crt_query.os_tag = .freestanding; + crt_query.ofmt = null; + // Explicitly pin the cpu model to whatever the game target + // resolved to (arm.mpcore for the 3DS). Without this, swapping + // os_tag to .freestanding loses the os-derived cpu choice and + // zig falls back to a generic baseline that emits ARMv6T2+ + // instructions (e.g. `mls`) the ARMv6K MPCore doesn't decode — + // crashes show up as "undefined instruction" in compiler_rt + // helpers like `__udivmodsi4`. + crt_query.cpu_model = .{ .explicit = game_target.result.cpu.model }; + const crt_target = b.resolveTargetQuery(crt_query); + + const compiler_rt_path = b.pathJoin(&.{ + b.graph.zig_lib_directory.path orelse ".", + "compiler_rt.zig", + }); + const crt_obj = b.addObject(.{ + .name = "aether_3ds_compiler_rt", + .root_module = b.createModule(.{ + .root_source_file = .{ .cwd_relative = compiler_rt_path }, + .target = crt_target, + .optimize = .ReleaseSmall, + .strip = true, + }), + }); + + const dkp = devkitProPath(b); + + // Strip the libc-overlap symbols from the compiler_rt object. + // zig's compiler_rt re-exports `memset`/`memcpy`/`memmove` and + // their `__aeabi_*` shims as WEAK; the `__aeabi_memset` and + // `memset` versions form a recursive `bl` cycle that blows the + // stack on 32-bit ARM. Newlib has real implementations, but the + // linker won't reach for them while compiler_rt's weak version + // already resolves the reference. `--strip-symbol` drops the + // exports so the references stay unresolved at compiler_rt and + // the linker pulls newlib's strong implementations from libc.a. + const strip_libc = b.addSystemCommand(&.{ + b.pathJoin(&.{ dkp, "devkitARM/bin/arm-none-eabi-objcopy" }), + "--localize-symbol=memset", + "--localize-symbol=memcpy", + "--localize-symbol=memmove", + "--localize-symbol=memcmp", + "--localize-symbol=__memset", + "--localize-symbol=__memcpy", + "--localize-symbol=__memmove", + "--localize-symbol=__memcpy_chk", + "--localize-symbol=__aeabi_memset", + "--localize-symbol=__aeabi_memset4", + "--localize-symbol=__aeabi_memset8", + "--localize-symbol=__aeabi_memcpy", + "--localize-symbol=__aeabi_memcpy4", + "--localize-symbol=__aeabi_memcpy8", + "--localize-symbol=__aeabi_memmove", + "--localize-symbol=__aeabi_memmove4", + "--localize-symbol=__aeabi_memmove8", + "--localize-symbol=strlen", + "--localize-symbol=bcmp", + }); + strip_libc.addArtifactArg(crt_obj); + const crt_clean = strip_libc.addOutputFileArg("aether_3ds_compiler_rt.o"); + const gcc = b.pathJoin(&.{ dkp, "devkitARM/bin/arm-none-eabi-gcc" }); + const tool_3dsx = b.pathJoin(&.{ dkp, "tools/bin/3dsxtool" }); + const tool_smdh = b.pathJoin(&.{ dkp, "tools/bin/smdhtool" }); + const ctru_inc = b.pathJoin(&.{ dkp, "libctru/include" }); + const ctru_lib = b.pathJoin(&.{ dkp, "libctru/lib" }); + const default_icon = b.pathJoin(&.{ dkp, "libctru/default_icon.png" }); + const zig_h_src = b.pathJoin(&.{ b.graph.zig_lib_directory.path orelse ".", "zig.h" }); + + // zig.h hardcodes `zig_align(16)` for its `zig_i128`/`zig_u128` + // struct fallback (used when `__int128` isn't supported by the C + // compiler -- gcc on 32-bit ARM is one such target). Zig's ARM + // layout uses 8-byte alignment for those integer types, while f128 + // still needs 16-byte alignment. Patch only the integer fallback + // typedefs, then route unsupported ARM f128 through zig.h's vector + // fallback with explicit 16-byte alignment. + const patch = b.addSystemCommand(&.{"perl"}); + patch.addArgs(&.{ + "-0pe", + \\s/typedef struct \{ zig_align\(16\) uint64_t lo; uint64_t hi; \} zig_u128;/typedef struct { zig_align(8) uint64_t lo; uint64_t hi; } zig_u128;/g; + \\s/typedef struct \{ zig_align\(16\) uint64_t lo; int64_t hi; \} zig_i128;/typedef struct { zig_align(8) uint64_t lo; int64_t hi; } zig_i128;/g; + \\s/typedef struct \{ zig_align\(16\) uint64_t hi; uint64_t lo; \} zig_u128;/typedef struct { zig_align(8) uint64_t hi; uint64_t lo; } zig_u128;/g; + \\s/typedef struct \{ zig_align\(16\) int64_t hi; uint64_t lo; \} zig_i128;/typedef struct { zig_align(8) int64_t hi; uint64_t lo; } zig_i128;/g; + \\s/#if defined\(zig_darwin\) \|\| defined\(zig_aarch64\)/#if defined(zig_darwin) || defined(zig_aarch64) || defined(zig_arm)/; + \\s/typedef __attribute__\(\(__vector_size__\(2 \* sizeof\(uint64_t\)\)\)\) uint64_t zig_v2u64;/typedef __attribute__((__vector_size__(2 * sizeof(uint64_t)), aligned(16))) uint64_t zig_v2u64;/; + }); + patch.addFileArg(.{ .cwd_relative = zig_h_src }); + const patched_zig_h = patch.captureStdOut(.{ .basename = "zig.h" }); + + const include_wf = b.addWriteFiles(); + _ = include_wf.addCopyFile(patched_zig_h, "zig.h"); + + const shim_wf = b.addWriteFiles(); + const exception_shim = shim_wf.add("aether_3ds_exception.c", + \\#include <3ds.h> + \\ + \\extern void aether3dsExceptionHandler(ERRF_ExceptionInfo *excep, CpuRegisters *regs); + \\ + \\void aether3dsInstallExceptionHandler(void *stack_top) { + \\ threadOnException(aether3dsExceptionHandler, stack_top, WRITE_DATA_TO_HANDLER_STACK); + \\} + \\ + ); + + // Small linker script fragment providing accurate .text bounds as + // link-time constants. This lets the panic unwinder in services.zig + // use real section start/end (via ADDR/SIZEOF) instead of any + // hardcoded/sketchy ranges for isLikelyReturnAddress text checks. + const syms_wf = b.addWriteFiles(); + const text_syms_ld = syms_wf.add("aether_3ds_text_syms.ld", + \\/* Zig C backend (for 3DS ofmt=c) mangles extern names with zig_e_ prefix. */ + \\/* Provide both for the isLikelyReturnAddress range checks + any debug. */ + \\zig_e___text_start = ADDR(.text); + \\zig_e___text_end = ADDR(.text) + SIZEOF(.text); + \\__text_start = zig_e___text_start; + \\__text_end = zig_e___text_end; + ); + + // Standard 3DS arch flags from devkitPro's template Makefile. + const arch = [_][]const u8{ + "-march=armv6k", "-mtune=mpcore", "-mfloat-abi=hard", "-mtp=soft", + // Keep frame pointers so manual r11-based stack walk in panic handler + // can produce useful unwind (otherwise gcc -O* uses r11 as temp and + // chains are absent or clobbered, leading to data aborts in the walker). + "-fno-omit-frame-pointer", + }; + + // Single-shot compile + link via the gcc driver. 3dsx.specs pulls + // in `_3dsx_crt0` (which calls our exported `main`) and the 3DSX + // linker script. We also supply a tiny -T fragment for accurate + // __text_* symbols (see text_syms_ld above). + const exe_optimize = cBackendOptimizeMode(exe); + + const link = b.addSystemCommand(&.{gcc}); + link.addArgs(&arch); + link.addArgs(&.{ + "-mword-relocations", + "-ffunction-sections", + "-D_FORTIFY_SOURCE=0", + "-D__3DS__", + "-DARM11", + cBackendGccOptimizeArg(exe_optimize), + cBackendGccDebugArg(exe_optimize), + "-specs=3dsx.specs", + "-T", + }); + link.addFileArg(text_syms_ld); + link.addArgs(&.{ + "-Wl,--wrap=threadCreate", + "-Wl,--no-warn-execstack", + }); + link.addArgs(&.{ + // Pin the C standard to C11. zig.h picks `[[noreturn]]` under + // C23 but emits it in attribute-list position that gcc rejects; + // C11's `_Noreturn` is what zig's emitter actually targets. + "-std=gnu11", + }); + link.addArgs(&.{ + // zig's -ofmt=c emitter treats `uintptr_t` and `uint32_t` as + // interchangeable on 32-bit ARM (they ARE the same width) but + // gcc 14+ promotes the resulting pointer-type mismatch from a + // warning to an error. Demote it and a couple of related + // chatters; we don't author this C and there's nothing + // actionable in the warnings. + "-fno-strict-aliasing", + "-Wno-incompatible-pointer-types", + "-Wno-int-conversion", + "-Wno-builtin-declaration-mismatch", + }); + link.addArg(b.fmt("-I{s}", .{ctru_inc})); + link.addPrefixedDirectoryArg("-I", include_wf.getDirectory()); + link.addArg("-x"); + link.addArg("c"); + link.addFileArg(patch3dsGeneratedC(b, exe)); + link.addFileArg(exception_shim); + // Reset language so gcc treats subsequent inputs by extension; the + // compiler_rt object is ELF arm and `-x c` would mis-parse it. + link.addArg("-x"); + link.addArg("none"); + link.addFileArg(crt_clean); + link.addArg(b.fmt("-L{s}", .{ctru_lib})); + link.addArgs(&.{ "-lctru", "-lm" }); + link.addArg("-o"); + const elf = link.addOutputFileArg(b.fmt("{s}.elf", .{exe.name})); + + // SMDH metadata (HOME-menu name, description, author, icon). + const smdh_run = b.addSystemCommand(&.{ tool_smdh, "--create" }); + smdh_run.addArg(if (opts.title.len > 0) opts.title else exe.name); + smdh_run.addArg(if (opts.smdh_long_description.len > 0) + opts.smdh_long_description + else + "Built with Aether"); + smdh_run.addArg(opts.smdh_author); + if (opts.smdh_icon) |icon| + smdh_run.addFileArg(icon) + else + smdh_run.addArg(default_icon); + const smdh = smdh_run.addOutputFileArg(b.fmt("{s}.smdh", .{exe.name})); + + // ELF -> 3DSX. The smdh and (optional) romfs ride in via the + // `--smdh=` / `--romfs=` flag-form args. + const pack = b.addSystemCommand(&.{tool_3dsx}); + pack.addFileArg(elf); + const threedsx = pack.addOutputFileArg(b.fmt("{s}.3dsx", .{exe.name})); + pack.addPrefixedFileArg("--smdh=", smdh); + if (opts.romfs) |r| pack.addPrefixedDirectoryArg("--romfs=", r); + + if (opts.output_dir) |dir| { + const alloc = b.allocator; + b.getInstallStep().dependOn(&b.addInstallBinFile( + threedsx, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".3dsx" }) catch @panic("OOM"), + ).step); + b.getInstallStep().dependOn(&b.addInstallBinFile( + elf, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".elf" }) catch @panic("OOM"), + ).step); + } else { + b.getInstallStep().dependOn(&b.addInstallBinFile( + threedsx, + b.fmt("{s}.3dsx", .{exe.name}), + ).step); + } +} + +/// Compiles the zig-emitted C with devkitA64, links against libnx, and +/// packages the ELF (plus a NACP and optional RomFS) into a `.nro` +/// homebrew bundle. Mirrors `threedsxPipeline` for the Switch toolchain. +fn switchNroPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOptions) void { + // aarch64 GCC supports __int128 natively, so we don't need the + // `zig.h` align(16) -> align(8) patch the 3DS pipeline applies. + // + // We do still need a compiler_rt object: zig.h calls helpers like + // `__floatunsisf` / `__floatundidf` / `__floatdisf` unconditionally, + // but devkitA64's libgcc doesn't ship them — gcc on aarch64 with + // hardware FP inlines these casts as `ucvtf`/`scvtf`, so the + // helpers are dead code in normal compilations. Zig's emitted C + // takes the slow path, so we drop in zig's own compiler_rt to + // satisfy the references. Like the 3DS pipeline we localize + // symbols that overlap newlib (memset/memcpy/...) so the linker + // pulls newlib's strong implementations. + const game_target = exe.root_module.resolved_target.?; + var crt_query = game_target.query; + crt_query.os_tag = .freestanding; + crt_query.ofmt = null; + crt_query.cpu_model = .{ .explicit = game_target.result.cpu.model }; + const crt_target = b.resolveTargetQuery(crt_query); + + const compiler_rt_path = b.pathJoin(&.{ + b.graph.zig_lib_directory.path orelse ".", + "compiler_rt.zig", + }); + const crt_obj = b.addObject(.{ + .name = "aether_switch_compiler_rt", + .root_module = b.createModule(.{ + .root_source_file = .{ .cwd_relative = compiler_rt_path }, + .target = crt_target, + .optimize = .ReleaseSmall, + .strip = true, + // Switch homebrew uses libnx's switch.specs which links + // with `-z text`. PIC is mandatory for any object that + // ends up in the read-only .text segment, otherwise the + // linker rejects the dynamic absolute relocations. + .pic = true, + }), + }); + + const dkp = devkitProPath(b); + + const strip_libc = b.addSystemCommand(&.{ + b.pathJoin(&.{ dkp, "devkitA64/bin/aarch64-none-elf-objcopy" }), + "--localize-symbol=memset", + "--localize-symbol=memcpy", + "--localize-symbol=memmove", + "--localize-symbol=memcmp", + "--localize-symbol=strlen", + "--localize-symbol=bcmp", + }); + strip_libc.addArtifactArg(crt_obj); + const crt_clean = strip_libc.addOutputFileArg("aether_switch_compiler_rt.o"); + const gcc = b.pathJoin(&.{ dkp, "devkitA64/bin/aarch64-none-elf-gcc" }); + const tool_elf2nro = b.pathJoin(&.{ dkp, "tools/bin/elf2nro" }); + const tool_nacp = b.pathJoin(&.{ dkp, "tools/bin/nacptool" }); + const libnx_inc = b.pathJoin(&.{ dkp, "libnx/include" }); + const libnx_lib = b.pathJoin(&.{ dkp, "libnx/lib" }); + const libnx_specs = b.pathJoin(&.{ dkp, "libnx/switch.specs" }); + const default_icon = b.pathJoin(&.{ dkp, "libnx/default_icon.jpg" }); + + const syms_wf = b.addWriteFiles(); + const text_syms_ld = syms_wf.add("aether_switch_text_syms.ld", + \\/* Zig C backend (for Switch ofmt=c) mangles extern names with zig_e_ prefix. */ + \\zig_e___text_start = ADDR(.text); + \\zig_e___text_end = ADDR(.text) + SIZEOF(.text); + \\__text_start = zig_e___text_start; + \\__text_end = zig_e___text_end; + ); + + // Standard Switch arch flags from devkitPro's switch_rules / + // example Makefiles. `-mtp=soft` matches what libnx is built + // against; mismatching the TLS access mode crashes on the first + // thread-local read. + const arch = [_][]const u8{ + "-march=armv8-a+crc+crypto", "-mtune=cortex-a57", "-mtp=soft", "-fPIE", "-fno-omit-frame-pointer", + }; + + const exe_optimize = cBackendOptimizeMode(exe); + + const link = b.addSystemCommand(&.{gcc}); + link.addArgs(&arch); + link.addArgs(&.{ + "-ffunction-sections", + "-fdata-sections", + "-D_FORTIFY_SOURCE=0", + "-D__SWITCH__", + cBackendGccOptimizeArg(exe_optimize), + cBackendGccDebugArg(exe_optimize), + b.fmt("-specs={s}", .{libnx_specs}), + "-T", + // Pin the C standard to C11 (zig.h targets `_Noreturn`, not + // C23's `[[noreturn]]`). + }); + link.addFileArg(text_syms_ld); + link.addArgs(&.{ + "-std=gnu11", + // zig's -ofmt=c emitter has known pointer/int-conversion + // mismatches that gcc 14+ promotes to errors. We don't author + // the C, so demote them. + "-fno-strict-aliasing", + "-Wno-incompatible-pointer-types", + "-Wno-int-conversion", + "-Wno-builtin-declaration-mismatch", + }); + link.addArg(b.fmt("-I{s}", .{libnx_inc})); + // zig's emitted C `#include "zig.h"`. The header lives in zig's + // own lib directory; point gcc at it. aarch64 GCC's __int128 + // alignment matches zig's, so no patching is needed (unlike 3DS). + link.addArg(b.fmt("-I{s}", .{b.graph.zig_lib_directory.path orelse "."})); + link.addArg("-x"); + link.addArg("c"); + link.addArtifactArg(exe); + link.addArg("-x"); + link.addArg("none"); + link.addFileArg(crt_clean); + link.addArg(b.fmt("-L{s}", .{libnx_lib})); + link.addArgs(&.{ "-ldeko3d", "-lnx", "-lm" }); + link.addArg("-o"); + const elf = link.addOutputFileArg(b.fmt("{s}.elf", .{exe.name})); + + // NACP metadata (HOME-menu title, author, version). + const nacp_run = b.addSystemCommand(&.{ tool_nacp, "--create" }); + nacp_run.addArg(if (opts.title.len > 0) opts.title else exe.name); + nacp_run.addArg(if (opts.switch_author.len > 0) opts.switch_author else "Aether"); + nacp_run.addArg(if (opts.switch_version.len > 0) opts.switch_version else "1.0.0"); + const nacp = nacp_run.addOutputFileArg(b.fmt("{s}.nacp", .{exe.name})); + + // ELF -> NRO. The icon, NACP, and (optional) romfs ride in via + // flag-form args. + const pack = b.addSystemCommand(&.{tool_elf2nro}); + pack.addFileArg(elf); + const nro = pack.addOutputFileArg(b.fmt("{s}.nro", .{exe.name})); + if (opts.switch_icon) |icon| + pack.addPrefixedFileArg("--icon=", icon) + else + pack.addArg(b.fmt("--icon={s}", .{default_icon})); + pack.addPrefixedFileArg("--nacp=", nacp); + if (opts.switch_romfs) |r| pack.addPrefixedDirectoryArg("--romfsdir=", r); + + if (opts.output_dir) |dir| { + const alloc = b.allocator; + b.getInstallStep().dependOn(&b.addInstallBinFile( + nro, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".nro" }) catch @panic("OOM"), + ).step); + b.getInstallStep().dependOn(&b.addInstallBinFile( + elf, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".elf" }) catch @panic("OOM"), + ).step); + } else { + b.getInstallStep().dependOn(&b.addInstallBinFile( + nro, + b.fmt("{s}.nro", .{exe.name}), + ).step); } } // --- Aether's own build (test app + engine tests) --- +fn makeResourceManifest(b: *std.Build, resource_dir_path: []const u8) []const u8 { + const io = b.graph.io; + const full_resource_dir_path = b.pathFromRoot(resource_dir_path); + var dir = std.Io.Dir.cwd().openDir(io, full_resource_dir_path, .{ .iterate = true }) catch |err| { + std.debug.panic("unable to open web resource directory '{s}': {s}", .{ resource_dir_path, @errorName(err) }); + }; + defer dir.close(io); + + var walker = dir.walk(b.allocator) catch @panic("OOM"); + defer walker.deinit(); + + var manifest: std.ArrayList(u8) = .empty; + while (walker.next(io) catch |err| { + std.debug.panic("unable to walk web resource directory '{s}': {s}", .{ resource_dir_path, @errorName(err) }); + }) |entry| { + if (entry.kind != .file) continue; + if (std.mem.eql(u8, entry.path, "resources.manifest")) continue; + manifest.appendSlice(b.allocator, entry.path) catch @panic("OOM"); + manifest.append(b.allocator, '\n') catch @panic("OOM"); + } + return manifest.toOwnedSlice(b.allocator) catch @panic("OOM"); +} + pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + const threeds_heap_mib = b.option(u32, "3ds-heap-mib", "3DS: regular libctru/newlib heap size in MiB (default: 4)"); + const threeds_linear_heap_mib = b.option(u32, "3ds-linear-heap-mib", "3DS: linear heap size in MiB (default: 60)"); + const web_resources_path = b.option([]const u8, "web-resources", "WASM/browser: directory to copy into zig-out/web and preload via resources.manifest (default: test)") orelse "test"; + const web_host = b.option([]const u8, "web-host", "serve-web: bind host (default: 127.0.0.1)") orelse "127.0.0.1"; + const web_port = b.option(u16, "web-port", "serve-web: bind port (default: 8080)") orelse 8080; const overrides: Config.Overrides = .{ .gfx = b.option(Gfx, "gfx", "Graphics backend override (default: auto-detect from target)"), - .audio = b.option(Audio, "audio", "Audio backend override (default: .none on macOS, .default elsewhere)"), + .audio = b.option(Audio, "audio", "Audio backend override (default: platform default)"), .psp_display_mode = b.option(PspDisplayMode, "psp-display", "PSP display mode: rgba8888 (32-bit, default) or rgb565 (16-bit)"), .psp_mipmaps = b.option(bool, "psp-mipmaps", "PSP: generate mip levels for VRAM-resident textures (default: false)"), + .nintendo_3ds_heap_size = if (threeds_heap_mib) |mib| mib * 1024 * 1024 else null, + .nintendo_3ds_linear_heap_size = if (threeds_linear_heap_mib) |mib| mib * 1024 * 1024 else null, .use_cwd = b.option(bool, "use-cwd", "Force resources+data dirs to CWD (debug/CI convenience; default: false)"), + .flush_logs = b.option(bool, "flush-logs", "Flush aether.log after every log message (debugging hard hangs; default: false)"), + .nintendo_switch = b.option(bool, "nintendo-switch", "Build for Nintendo Switch (requires -Dtarget=aarch64-freestanding-none and devkitA64/libnx)"), }; const config = Config.resolve(target, overrides); @@ -739,26 +1686,107 @@ pub fn build(b: *std.Build) void { .overrides = overrides, }); - addShader(b, b, exe, config, "basic", .{ - .slang = b.path("test/shaders/basic.slang"), - }); + const nintendo_romfs = b.addWriteFiles(); + _ = nintendo_romfs.addCopyFile(b.path("test/test.png"), "test.png"); + _ = nintendo_romfs.addCopyFile(b.path("test/calm1.wav"), "calm1.wav"); + _ = nintendo_romfs.addCopyFile(b.path("test/grass1.wav"), "grass1.wav"); exportArtifact(b, b, exe, config, .{ .title = "Aether", - .output_dir = "Aether-PSP", + .output_dir = switch (config.platform) { + .psp => "Aether-PSP", + .nintendo_3ds => "Aether-3DS", + .nintendo_switch => "Aether-Switch", + else => null, + }, + .smdh_long_description = "Aether engine test app", + .smdh_author = "Aether", + .resources = &.{ + .{ .path = b.path("test/test.png"), .name = "test.png" }, + .{ .path = b.path("test/calm1.wav"), .name = "calm1.wav" }, + .{ .path = b.path("test/grass1.wav"), .name = "grass1.wav" }, + }, + .romfs = if (config.platform == .nintendo_3ds) nintendo_romfs.getDirectory() else null, + .switch_romfs = if (config.platform == .nintendo_switch) nintendo_romfs.getDirectory() else null, }); + const web_target = webTarget(b); + const web_overrides: Config.Overrides = .{ + .gfx = .webgl, + .use_cwd = true, + }; + const web_exe = addGame(b, b, .{ + .name = "Aether", + .root_source_file = b.path("test/web_main.zig"), + .target = web_target, + .optimize = optimize, + .overrides = web_overrides, + }); + const web_install = addWebBundle(b, b, web_exe, .{ + .web_resources = b.path(web_resources_path), + .web_resource_manifest = makeResourceManifest(b, web_resources_path), + }); + + const web_step = b.step("web", "Build the browser-playable WASM site in zig-out/web"); + web_step.dependOn(&web_install.step); + + const serve_web_cmd = addServeWebStep(b, b, "aether-serve-web", web_install, web_host, web_port); + + const serve_web_step = b.step("serve-web", "Serve zig-out/web with WASM MIME and COOP/COEP headers"); + serve_web_step.dependOn(&serve_web_cmd.step); + const run_step = b.step("run", "Run the app"); - const run_cmd = b.addRunArtifact(exe); - run_step.dependOn(&run_cmd.step); + if (config.platform == .nintendo_3ds) { + // 3DS can't run natively on the host. The 3DS-side homebrew + // launcher listens for incoming .3dsx pushes on port 17491; + // `3dslink` finds it via mDNS or accepts an explicit IP. + const link_cmd = add3dslink(b, b.getInstallPath(.bin, "Aether-3DS/Aether.3dsx")); + + const link_step = b.step("3dslink", "Push the 3dsx to a networked 3DS via 3dslink"); + link_step.dependOn(&link_cmd.step); + + // `zig build run` aliases to 3dslink for 3DS so the same + // command works across host/PSP/3DS workflows. + run_step.dependOn(&link_cmd.step); + } else if (config.platform == .nintendo_switch) { + // Switch can't run natively on the host. nxlink pushes the + // .nro to nx-hbloader on a networked Switch (mDNS by default, + // explicit IP via -a). + const dkp = devkitProPath(b); + const link_cmd = b.addSystemCommand(&.{b.pathJoin(&.{ dkp, "tools/bin/nxlink" })}); + if (b.option([]const u8, "nxlink-address", "Switch: target IP for nxlink push (default: mDNS auto-discover)")) |ip| { + link_cmd.addArgs(&.{ "-a", ip }); + } + if (b.option(u32, "nxlink-retries", "Switch: nxlink retry count (default: 10)")) |n| { + link_cmd.addArgs(&.{ "-r", b.fmt("{d}", .{n}) }); + } + if (b.option(bool, "nxlink-server", "Switch: pass -s so nxlink stays listening after upload (relays stdout/stderr from nro)") orelse false) { + link_cmd.addArg("-s"); + } + link_cmd.addArg(b.getInstallPath(.bin, "Aether-Switch/Aether.nro")); + link_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + // nxlink takes nro args after a `--args` separator. + link_cmd.addArg("--args"); + link_cmd.addArgs(args); + } + + const link_step = b.step("nxlink", "Push the nro to a networked Switch via nxlink"); + link_step.dependOn(&link_cmd.step); - run_cmd.step.dependOn(b.getInstallStep()); - if (b.args) |args| { - run_cmd.addArgs(args); + // `zig build run` aliases to nxlink for Switch so the same + // command works across host/PSP/3DS/Switch workflows. + run_step.dependOn(&link_cmd.step); + } else { + const run_cmd = b.addRunArtifact(exe); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| run_cmd.addArgs(args); + run_step.dependOn(&run_cmd.step); } - // Engine unit tests (desktop only) - if (config.platform != .psp) { + // Engine unit tests (desktop only — PSP/3DS/Switch pull in symbols + // that can't be linked or analyzed under the test runner) + if (config.platform != .psp and config.platform != .nintendo_3ds and config.platform != .nintendo_switch) { const mod_tests = b.addTest(.{ .root_module = exe.root_module.import_table.get("aether").?, }); diff --git a/build.zig.zon b/build.zig.zon index c6b279c..5c1b89c 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -76,15 +76,20 @@ .url = "git+https://github.com/Snektron/vulkan-zig#3adbeefbc833c12656791d304b37a6315b357745", .hash = "vulkan-0.0.0-r7YtxyBsAwD35FvkFhT2xruTNdCByD3TtGw44XWtfun8", }, - .zaudio = .{ - .url = "git+https://github.com/IridescentRose/zaudio?ref=update_zig_16#54e45e002005448623bff0ba06baa919bd036e03", - .hash = "zaudio-0.11.0-dev-_M-91l8yQQDTAb3Hk8V-lyP88DGgLzS9QNUkmpbCN_PK", - }, .system_sdk = .{ .url = "https://github.com/zig-gamedev/system_sdk/archive/c0dbf11cdc17da5904ea8a17eadc54dee26567ec.tar.gz", .hash = "system_sdk-0.3.0-dev-alwUNnYaaAJAtIdE2fg4NQfDqEKs7QCXy_qYukAOBfmF", .lazy = true, }, + .sdl3 = .{ + .url = "git+https://codeberg.org/7Games/zig-sdl3?ref=master#40c2e4b579aa556db37a502c936426aa1c8b5c95", + .hash = "sdl3-0.2.0-NmT1Q0mFJwBi9kZmArzh2rfJ_mFshydV0zPGULVlpACc", + .lazy = true, + }, + .zsflt = .{ + .url = "git+https://github.com/GasInfinity/zsflt.git#1929cbae9d41c7e1178e494e42aa7b349a68cd8a", + .hash = "zsflt-0.0.1-_pCibQ4wAABnfZskVOk2Y14F056VNJtuqVpAEzUNBGfl", + }, }, .paths = .{ "build.zig", diff --git a/src/audio/audio.zig b/src/audio/audio.zig index 98cd9c2..8892733 100644 --- a/src/audio/audio.zig +++ b/src/audio/audio.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Vec3 = @import("../math/math.zig").Vec3; const platform_audio = @import("../platform/audio.zig"); +const options = @import("options"); // -- types ------------------------------------------------------------------- @@ -13,6 +14,7 @@ pub const mixer_mod = @import("mixer.zig"); pub const SoundHandle = mixer_mod.SoundHandle; pub const PlayOptions = mixer_mod.PlayOptions; pub const Priority = mixer_mod.Priority; +pub const enabled = options.config.audio != .none; // -- forwarding to the instantiated mixer ------------------------------------ diff --git a/src/core/input/input.zig b/src/core/input/input.zig index 9d23303..978655a 100644 --- a/src/core/input/input.zig +++ b/src/core/input/input.zig @@ -277,7 +277,7 @@ pub fn register_action_set(name: []const u8) !ActionSetHandle { pub fn add_action(set: ActionSetHandle, name: []const u8, kind: ActionKind) !void { const s = set_ptr(set) orelse return error.UnknownActionSet; - if (s.actions.contains(name)) return error.ActionAlreadyExists; + if (action_ptr(s, name) != null) return error.ActionAlreadyExists; try s.actions.put(alloc, name, .{ .kind = kind, .bindings = .empty, @@ -288,7 +288,7 @@ pub fn add_action(set: ActionSetHandle, name: []const u8, kind: ActionKind) !voi pub fn bind_action(set: ActionSetHandle, action_name: []const u8, b: Binding) !void { const s = set_ptr(set) orelse return error.UnknownActionSet; - const a = s.actions.getPtr(action_name) orelse return error.ActionNotFound; + const a = action_ptr(s, action_name) orelse return error.ActionNotFound; if (a.kind == .vector2 and b.component == .none) return error.Vector2BindingNeedsComponent; try a.bindings.append(alloc, b); } @@ -310,7 +310,7 @@ pub fn get_action(name: []const u8) ?ActionValue { const top = stack.top() orelse return null; const s = set_ptr(top.actions) orelse return null; if (!s.installed) return null; - const a = s.actions.getPtr(name) orelse return null; + const a = action_ptr(s, name) orelse return null; return a.current_value; } @@ -479,6 +479,16 @@ fn set_ptr_or_null(handle: ActionSetHandle) ?*ActionSet { return set_ptr(handle); } +fn action_ptr(set: *ActionSet, name: []const u8) ?*Action { + if (set.actions.getPtr(name)) |action| return action; + + var it = set.actions.iterator(); + while (it.next()) |entry| { + if (std.mem.eql(u8, entry.key_ptr.*, name)) return entry.value_ptr; + } + return null; +} + fn route_text_to_session(text: []const u8) void { const top = stack.top() orelse return; if (!top.consumes_text) return; diff --git a/src/core/paths.zig b/src/core/paths.zig index 79c488d..96538a1 100644 --- a/src/core/paths.zig +++ b/src/core/paths.zig @@ -24,10 +24,13 @@ //! per project style guide they go through `std.Io` / `std.process`. const std = @import("std"); -const builtin = @import("builtin"); const options = @import("options"); const Io = std.Io; +const NintendoIo = if (options.config.platform == .nintendo_3ds or options.config.platform == .nintendo_switch) + @import("../platform/c_io.zig") +else + void; /// Engine-owned directory handles. Cleared via `close()` at engine shutdown. pub const Dirs = struct { @@ -48,6 +51,8 @@ pub const Dirs = struct { if (self.resources.handle != cwd_handle) self.resources.close(io); if (self.data.handle != cwd_handle and self.data.handle != self.resources.handle) self.data.close(io); + + if (NintendoIo != void) NintendoIo.deinitAppDirs(); } }; @@ -82,13 +87,15 @@ pub fn resolve( // and debug/CI builds where state co-located with the binary is a // feature, not a bug. if (options.config.use_cwd) { + if (NintendoIo != void) NintendoIo.useCwdDirs(); return .{ .resources = Io.Dir.cwd(), .data = Io.Dir.cwd() }; } - return switch (builtin.os.tag) { + return switch (options.config.platform) { .macos => resolve_macos(io, environ_map, app_name), .windows => resolve_windows(io, environ_map, app_name), .linux => resolve_linux(io, environ_map, app_name), + .nintendo_3ds, .nintendo_switch => resolve_nintendo(io, app_name), // PSP: both dirs collapse to CWD. The EBOOT and its siblings all // live under `ms0:/PSP/GAME//`; the runtime sets CWD there // before main. No separation to enforce. @@ -100,6 +107,23 @@ pub fn resolve( }; } +fn resolve_nintendo(io: Io, app_name: []const u8) Error!Dirs { + NintendoIo.mountData(); + errdefer NintendoIo.deinitAppDirs(); + + var data_buf: [Io.Dir.max_path_bytes]u8 = undefined; + const data_path = NintendoIo.dataRoot(&data_buf, app_name) catch return error.PathTooLong; + const data = try Io.Dir.cwd().createDirPathOpen(io, data_path, .{ .open_options = .{ .iterate = true } }); + errdefer data.close(io); + + const resources = if (NintendoIo.mountResources()) + Io.Dir.cwd().openDir(io, "romfs:/", .{}) catch data + else + data; + + return .{ .resources = resources, .data = data }; +} + // -- macOS -------------------------------------------------------------------- fn resolve_macos( diff --git a/src/engine.zig b/src/engine.zig index c94645c..fc4a22b 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const builtin = @import("builtin"); const assert = std.debug.assert; const Util = @import("util/util.zig"); @@ -87,8 +86,30 @@ pub const Engine = struct { trackers: [TRACKER_COUNT]CategoryTracker, running: bool, vsync: bool, - state: *const Core.State, + state: Core.State, dirs: Core.paths.Dirs, + debug_trace_loops: u8, + debug_trace_loop_index: u32, + run_loop: RunLoop, + + const RunLoop = struct { + run_start_ns: i96 = 0, + last_us: i64 = 0, + update_accum: i64 = 0, + tick_accum: i64 = 0, + fps_count: u32 = 0, + fps_window_end: i64 = std.time.us_per_s, + initialized: bool = false, + + fn reset(self: *RunLoop, io: std.Io) void { + var clock = std.Io.Clock.boot; + self.* = .{ + .run_start_ns = clock.now(io).toNanoseconds(), + .fps_window_end = std.time.us_per_s, + .initialized = true, + }; + } + }; pub const Config = struct { memory: MemoryConfig, @@ -127,7 +148,10 @@ pub const Engine = struct { self.io = sys_io; self.running = true; self.vsync = config.vsync; - self.state = state; + self.state = state.*; + self.debug_trace_loops = 0; + self.debug_trace_loop_index = 0; + self.run_loop = .{}; self.pool = memory.PoolAlloc.init(mem, "main"); const inner = self.pool.allocator(); @@ -148,9 +172,20 @@ pub const Engine = struct { try logger.init(sys_io, self.dirs.data); - try Platform.init(self, config.width, config.height, config.title, config.fullscreen, config.vsync, config.resizable); - try Rendering.Texture.init_defaults(self.allocator(.render)); - try Core.state_machine.init(self, state); + Platform.init(self, config.width, config.height, config.title, config.fullscreen, config.vsync, config.resizable) catch |err| switch (err) { + error.OutOfMemory => return error.PlatformInitOutOfMemory, + else => return err, + }; + errdefer Platform.deinit(); + + Rendering.Texture.init_defaults(self.allocator(.render)) catch |err| switch (err) { + error.OutOfMemory => return error.DefaultTexturesOutOfMemory, + else => return err, + }; + Core.state_machine.init(self, &self.state) catch |err| switch (err) { + error.OutOfMemory => return error.StateInitOutOfMemory, + else => return err, + }; } pub fn deinit(self: *Engine) void { @@ -174,6 +209,11 @@ pub const Engine = struct { Platform.gfx.set_vsync(v); } + pub fn trace_next_loops(self: *Engine, count: u8) void { + self.debug_trace_loops = count; + self.debug_trace_loop_index = 0; + } + pub fn pool_used(self: *const Engine, p: Pool) usize { return self.trackers[@intFromEnum(p)].used; } @@ -191,158 +231,412 @@ pub const Engine = struct { } pub fn total_used(self: *const Engine) usize { - return self.pool.used; + var total: usize = 0; + for (self.trackers) |tracker| total += tracker.used; + return total; } pub fn total_budget(self: *const Engine) usize { - return self.pool.budget; + var total: usize = 0; + for (self.trackers) |tracker| total += tracker.budget; + return total; } pub fn report(self: *const Engine) void { - const mib = 1024.0 * 1024.0; Util.engine_logger.info("--- memory pools ---", .{}); inline for (std.meta.fields(Pool)) |f| { const p: Pool = @enumFromInt(f.value); const used = self.pool_used(p); const budget = self.pool_budget(p); const remaining = self.pool_remaining(p); - Util.engine_logger.info(" {s}: {}/{} bytes ({d:.3}/{d:.3} MiB, {} remaining)", .{ + Util.engine_logger.info(" {s}: {}/{} bytes ({}/{} KiB, {} remaining)", .{ f.name, used, budget, - @as(f64, @floatFromInt(used)) / mib, - @as(f64, @floatFromInt(budget)) / mib, + used / 1024, + budget / 1024, remaining, }); } - Util.engine_logger.info(" total: {}/{} bytes ({d:.3}/{d:.3} MiB)", .{ - self.pool.used, - self.pool.budget, - @as(f64, @floatFromInt(self.pool.used)) / mib, - @as(f64, @floatFromInt(self.pool.budget)) / mib, + Util.engine_logger.info(" total: {}/{} bytes ({}/{} KiB)", .{ + self.total_used(), + self.total_budget(), + self.total_used() / 1024, + self.total_budget() / 1024, }); Util.engine_logger.info("--------------------", .{}); } + pub fn beginRun(self: *Engine) void { + self.run_loop.reset(self.io); + } + + pub fn stepFrame(self: *Engine) !bool { + if (!self.run_loop.initialized) self.beginRun(); + if (options.config.platform == .nintendo_3ds) { + @compileError("Engine.stepFrame is not implemented for the Nintendo 3DS fixed-vblank loop"); + } + try self.stepFrameInternal(false); + return self.running; + } + pub fn run(self: *Engine) !void { + if (options.config.platform == .nintendo_3ds) { + return self.runNintendo3ds(); + } + + self.beginRun(); + while (self.running) { + try self.stepFrameInternal(true); + } + } + + fn stepFrameInternal(self: *Engine, allow_sleep: bool) !void { + if (options.config.platform == .nintendo_3ds) unreachable; + const US_PER_S: u64 = std.time.us_per_s; const NS_PER_US: i64 = 1000; - // Fixed-step rates -- PSP targets 60 Hz display + // Fixed-step rates -- handheld backends target 60 Hz displays. const UPDATES_HZ: u32 = if (options.config.platform == .psp) 60 else 144; const TICKS_HZ: u32 = 20; const UPDATE_US: u64 = US_PER_S / UPDATES_HZ; const TICK_US: u64 = US_PER_S / TICKS_HZ; - const update_budget_ns: i64 = @as(i64, @intCast(UPDATE_US)) * NS_PER_US; - var clock = std.Io.Clock.real; + var clock = std.Io.Clock.boot; + const fps_window_us: i64 = @intCast(US_PER_S); + + const report_fps = options.config.gfx != .headless and !options.config.flush_logs; + + const trace_loop = self.debug_trace_loops > 0; + const trace_loop_index = self.debug_trace_loop_index + 1; + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} begin update_us={d} tick_us={d}", .{ + trace_loop_index, + UPDATE_US, + TICK_US, + }); + } + + var now_us = elapsedUsSince(self.run_loop.run_start_ns, clock.now(self.io).toNanoseconds()); + var frame_dt_us = saturatingSubI64(now_us, self.run_loop.last_us); + + if (frame_dt_us <= 0) { + if (allow_sleep) { + try std.Io.sleep(self.io, .fromNanoseconds(std.time.ns_per_ms), clock); + } + now_us = elapsedUsSince(self.run_loop.run_start_ns, clock.now(self.io).toNanoseconds()); + frame_dt_us = @max(0, saturatingSubI64(now_us, self.run_loop.last_us)); + if (frame_dt_us <= 0) { + frame_dt_us = 1000; + } + } + + if (frame_dt_us > 500_000) frame_dt_us = 500_000; + self.run_loop.last_us = now_us; + + self.run_loop.update_accum = saturatingAddI64(self.run_loop.update_accum, frame_dt_us); + self.run_loop.tick_accum = saturatingAddI64(self.run_loop.tick_accum, frame_dt_us); + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} time now_us={d} frame_dt_us={d} last_us={d} update_accum={d} tick_accum={d}", .{ + trace_loop_index, + now_us, + frame_dt_us, + self.run_loop.last_us, + self.run_loop.update_accum, + self.run_loop.tick_accum, + }); + Util.engine_logger.info("trace: engine loop {d} platform begin", .{trace_loop_index}); + } + + const platform_start_ns = clock.now(self.io).toNanoseconds(); + Platform.update(self); + const platform_done_ns = clock.now(self.io).toNanoseconds(); + var pre_update_elapsed_ns = elapsedNsBetween(platform_start_ns, platform_done_ns); + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} platform end running={}", .{ trace_loop_index, self.running }); + } + if (!self.running) return; + + // ---- fixed-rate TICK steps (e.g., 20 Hz logic) ---- + var is_tick_frame = false; + var tick_cost_ns: i64 = 0; + const tick_us: i64 = @intCast(TICK_US); + var tick_steps: u32 = 0; + while (self.run_loop.tick_accum >= tick_us) { + @branchHint(.unpredictable); + is_tick_frame = true; + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} tick {d} begin accum={d}", .{ + trace_loop_index, + tick_steps + 1, + self.run_loop.tick_accum, + }); + } + const tick_start_ns = clock.now(self.io).toNanoseconds(); + try Core.state_machine.tick(self); + const tick_end_ns = clock.now(self.io).toNanoseconds(); + tick_cost_ns = saturatingAddI64(tick_cost_ns, elapsedNsBetween(tick_start_ns, tick_end_ns)); + self.run_loop.tick_accum -= tick_us; + tick_steps += 1; + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} tick {d} end accum={d}", .{ + trace_loop_index, + tick_steps, + self.run_loop.tick_accum, + }); + } + } + + // ---- fixed-rate UPDATE steps (simulation & interpolation) ---- + const UPDATE_DT_S: f32 = @as(f32, @floatFromInt(UPDATE_US)) / @as(f32, US_PER_S); + var update_steps: u32 = 0; + while (self.run_loop.update_accum >= UPDATE_US) { + @branchHint(.unpredictable); + + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} input begin update_accum={d}", .{ + trace_loop_index, + self.run_loop.update_accum, + }); + } + const input_start_ns = clock.now(self.io).toNanoseconds(); + Platform.input.update(); + Core.input.update(); + const input_done_ns = clock.now(self.io).toNanoseconds(); + const engine_elapsed_ns = saturatingAddI64(pre_update_elapsed_ns, elapsedNsBetween(input_start_ns, input_done_ns)); + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} input end running={}", .{ trace_loop_index, self.running }); + } + if (!self.running) return; + + const budget = Util.BudgetContext{ + .phase_budget_ns = update_budget_ns, + .engine_elapsed_ns = engine_elapsed_ns, + .remaining_ns = update_budget_ns - engine_elapsed_ns, + .is_tick_frame = is_tick_frame, + .tick_cost_ns = tick_cost_ns, + .safety_margin_ns = Util.BudgetContext.DEFAULT_SAFETY_MARGIN_NS, + }; + + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} update {d} begin dt_bits=0x{x}", .{ + trace_loop_index, + update_steps + 1, + @as(u32, @bitCast(UPDATE_DT_S)), + }); + } + try Core.state_machine.update(self, UPDATE_DT_S, &budget); + pre_update_elapsed_ns = 0; + self.run_loop.update_accum -= UPDATE_US; + update_steps += 1; + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} update {d} end accum={d}", .{ + trace_loop_index, + update_steps, + self.run_loop.update_accum, + }); + } + } + + // ---- render ASAP (uncapped when vsync == false) ---- + const frame_dt_s: f32 = @as(f32, @floatFromInt(frame_dt_us)) / @as(f32, US_PER_S); + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} start_frame begin frame_dt_us={d}", .{ + trace_loop_index, + frame_dt_us, + }); + } + const drew_frame = Platform.gfx.api.start_frame(); + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} start_frame end drew={}", .{ trace_loop_index, drew_frame }); + } + if (drew_frame) { + const draw_start_ns = clock.now(self.io).toNanoseconds(); + // Time until next update step is due + const slack_us: i64 = @as(i64, @intCast(UPDATE_US)) - @max(0, self.run_loop.update_accum); + const draw_budget_ns: i64 = if (self.vsync) + slack_us * NS_PER_US + else + std.math.maxInt(i64); + + const draw_budget = Util.BudgetContext{ + .phase_budget_ns = draw_budget_ns, + .engine_elapsed_ns = 0, + .remaining_ns = draw_budget_ns, + .is_tick_frame = is_tick_frame, + .tick_cost_ns = tick_cost_ns, + .safety_margin_ns = Util.BudgetContext.DEFAULT_SAFETY_MARGIN_NS, + }; + + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} draw begin", .{trace_loop_index}); + } + try Core.state_machine.draw(self, frame_dt_s, &draw_budget); + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} draw end", .{trace_loop_index}); + } + _ = draw_start_ns; + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} end_frame begin", .{trace_loop_index}); + } + Platform.gfx.api.end_frame(); + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} end_frame end", .{trace_loop_index}); + } + } else { + @branchHint(.unlikely); + if (allow_sleep) { + if (options.config.gfx == .headless) { + const next_update = @as(i64, @intCast(UPDATE_US)) - self.run_loop.update_accum; + const next_tick = @as(i64, @intCast(TICK_US)) - self.run_loop.tick_accum; + const sleep_us = @max(0, @min(next_update, next_tick)); + if (sleep_us > 0) { + const sleep_ns = sleep_us * NS_PER_US; + try std.Io.sleep(self.io, .fromNanoseconds(@intCast(sleep_ns)), clock); + } + } else if (options.config.platform != .psp) { + try std.Io.sleep(self.io, .fromNanoseconds(50 * std.time.ns_per_ms), clock); + } + } + } + if (trace_loop) { + Util.engine_logger.info("trace: engine loop {d} end ticks={d} updates={d}", .{ + trace_loop_index, + tick_steps, + update_steps, + }); + self.debug_trace_loop_index = trace_loop_index; + self.debug_trace_loops -= 1; + } + + // ---- FPS counting ---- + if (report_fps) { + if (drew_frame) self.run_loop.fps_count += 1; + const end_us = elapsedUsSince(self.run_loop.run_start_ns, clock.now(self.io).toNanoseconds()); + if (end_us >= self.run_loop.fps_window_end) { + Util.engine_logger.info("FPS: {}", .{self.run_loop.fps_count}); + self.run_loop.fps_count = 0; + self.run_loop.fps_window_end = saturatingAddI64(end_us, fps_window_us); + } + } + } + + fn runNintendo3ds(self: *Engine) !void { + const US_PER_S: u64 = std.time.us_per_s; + const NS_PER_US: i64 = 1000; + const FRAMES_HZ: u32 = 60; + const TICKS_HZ: u32 = 20; + const TICK_FRAME_INTERVAL: u32 = FRAMES_HZ / TICKS_HZ; + const FRAME_US: u64 = US_PER_S / FRAMES_HZ; + const FRAME_DT_S: f32 = @as(f32, @floatFromInt(FRAME_US)) / @as(f32, US_PER_S); + const frame_budget_ns: i64 = @as(i64, @intCast(FRAME_US)) * NS_PER_US; - var last_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); - var update_accum: i64 = 0; - var tick_accum: i64 = 0; + comptime assert(FRAMES_HZ % TICKS_HZ == 0); - const report_fps = options.config.gfx != .headless; - var fps_count: u32 = 0; - var fps_window_end: i64 = last_us + US_PER_S; + if (!self.vsync) self.set_vsync(true); + var frame_count: u32 = 0; while (self.running) { - const now_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); - var frame_dt_us: i64 = now_us - last_us; - last_us = now_us; - - if (frame_dt_us > 500_000) frame_dt_us = 500_000; - - update_accum += frame_dt_us; - tick_accum += frame_dt_us; - - // ---- fixed-rate TICK steps (e.g., 20 Hz logic) ---- - var is_tick_frame = false; - var tick_cost_ns: i64 = 0; - const tick_us: i64 = @intCast(TICK_US); - while (tick_accum >= tick_us) { - @branchHint(.unpredictable); - is_tick_frame = true; - const tick_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - try Core.state_machine.tick(self); - const tick_end_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - tick_cost_ns += tick_end_ns - tick_start_ns; - tick_accum -= tick_us; + const trace_loop = self.debug_trace_loops > 0; + const trace_loop_index = self.debug_trace_loop_index + 1; + const is_tick_frame = frame_count % TICK_FRAME_INTERVAL == 0; + + if (trace_loop) { + Util.engine_logger.info("trace: 3ds loop {d} begin frame={d} dt_bits=0x{x} tick_frame={}", .{ + trace_loop_index, + frame_count, + @as(u32, @bitCast(FRAME_DT_S)), + is_tick_frame, + }); + Util.engine_logger.info("trace: 3ds loop {d} platform begin", .{trace_loop_index}); } - // ---- fixed-rate UPDATE steps (input update & interpolation) ---- - const UPDATE_DT_S: f32 = @as(f32, @floatFromInt(UPDATE_US)) / @as(f32, US_PER_S); - while (update_accum >= UPDATE_US) { - @branchHint(.unpredictable); - - const step_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - Platform.input.update(); - Platform.update(self); - Core.input.update(); - const engine_done_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - const engine_elapsed_ns = engine_done_ns - step_start_ns; - - const budget = Util.BudgetContext{ - .phase_budget_ns = update_budget_ns, - .engine_elapsed_ns = engine_elapsed_ns, - .remaining_ns = update_budget_ns - engine_elapsed_ns, - .is_tick_frame = is_tick_frame, - .tick_cost_ns = tick_cost_ns, - .safety_margin_ns = Util.BudgetContext.DEFAULT_SAFETY_MARGIN_NS, - }; + Platform.update(self); + if (trace_loop) { + Util.engine_logger.info("trace: 3ds loop {d} platform end running={}", .{ trace_loop_index, self.running }); + } + if (!self.running) break; - try Core.state_machine.update(self, UPDATE_DT_S, &budget); - update_accum -= UPDATE_US; + if (is_tick_frame) { + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} tick begin", .{trace_loop_index}); + try Core.state_machine.tick(self); + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} tick end", .{trace_loop_index}); } - // ---- render ASAP (uncapped when vsync == false) ---- - const frame_dt_s: f32 = @as(f32, @floatFromInt(frame_dt_us)) / @as(f32, US_PER_S); + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} input begin", .{trace_loop_index}); + Platform.input.update(); + Core.input.update(); + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} input end running={}", .{ trace_loop_index, self.running }); + if (!self.running) break; + + const budget = Util.BudgetContext{ + .phase_budget_ns = frame_budget_ns, + .engine_elapsed_ns = 0, + .remaining_ns = frame_budget_ns, + .is_tick_frame = is_tick_frame, + .tick_cost_ns = 0, + .safety_margin_ns = Util.BudgetContext.DEFAULT_SAFETY_MARGIN_NS, + }; + + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} update begin", .{trace_loop_index}); + try Core.state_machine.update(self, FRAME_DT_S, &budget); + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} update end", .{trace_loop_index}); + + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} start_frame begin", .{trace_loop_index}); const drew_frame = Platform.gfx.api.start_frame(); + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} start_frame end drew={}", .{ trace_loop_index, drew_frame }); if (drew_frame) { - const draw_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - // Time until next update step is due - const slack_us: i64 = @as(i64, @intCast(UPDATE_US)) - @max(0, update_accum); - const draw_budget_ns: i64 = if (self.vsync) - slack_us * NS_PER_US - else - std.math.maxInt(i64); - const draw_budget = Util.BudgetContext{ - .phase_budget_ns = draw_budget_ns, + .phase_budget_ns = frame_budget_ns, .engine_elapsed_ns = 0, - .remaining_ns = draw_budget_ns, + .remaining_ns = frame_budget_ns, .is_tick_frame = is_tick_frame, - .tick_cost_ns = tick_cost_ns, + .tick_cost_ns = 0, .safety_margin_ns = Util.BudgetContext.DEFAULT_SAFETY_MARGIN_NS, }; - try Core.state_machine.draw(self, frame_dt_s, &draw_budget); - _ = draw_start_ns; + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} draw begin", .{trace_loop_index}); + try Core.state_machine.draw(self, FRAME_DT_S, &draw_budget); + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} draw end", .{trace_loop_index}); + + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} end_frame begin", .{trace_loop_index}); Platform.gfx.api.end_frame(); - } else { - @branchHint(.unlikely); - if (options.config.gfx == .headless) { - const next_update = @as(i64, @intCast(UPDATE_US)) - update_accum; - const next_tick = @as(i64, @intCast(TICK_US)) - tick_accum; - const sleep_us = @max(0, @min(next_update, next_tick)); - if (sleep_us > 0) { - try std.Io.sleep(self.io, .fromMicroseconds(sleep_us), clock); - } - } else if (options.config.platform != .psp) { - try std.Io.sleep(self.io, .fromMilliseconds(50), clock); - } + if (trace_loop) Util.engine_logger.info("trace: 3ds loop {d} end_frame end", .{trace_loop_index}); } - // ---- FPS counting ---- - if (report_fps) { - if (drew_frame) fps_count += 1; - const end_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); - if (end_us >= fps_window_end) { - Util.engine_logger.info("FPS: {}", .{fps_count}); - fps_count = 0; - fps_window_end = end_us + US_PER_S; - } + if (trace_loop) { + Util.engine_logger.info("trace: 3ds loop {d} end drew={}", .{ trace_loop_index, drew_frame }); + self.debug_trace_loop_index = trace_loop_index; + self.debug_trace_loops -= 1; } + + frame_count +%= 1; } } }; + +fn elapsedNsBetween(start_ns: i96, end_ns: i96) i64 { + return clampI96ToI64(end_ns - start_ns); +} + +fn elapsedUsSince(start_ns: i96, end_ns: i96) i64 { + return @divTrunc(elapsedNsBetween(start_ns, end_ns), std.time.ns_per_us); +} + +fn saturatingAddI64(a: i64, b: i64) i64 { + return clampI96ToI64(@as(i96, a) + @as(i96, b)); +} + +fn saturatingSubI64(a: i64, b: i64) i64 { + return clampI96ToI64(@as(i96, a) - @as(i96, b)); +} + +fn clampI96ToI64(value: i96) i64 { + const max: i96 = std.math.maxInt(i64); + const min: i96 = std.math.minInt(i64); + if (value > max) return std.math.maxInt(i64); + if (value < min) return std.math.minInt(i64); + return @intCast(value); +} diff --git a/src/platform/3ds/3ds_audio.zig b/src/platform/3ds/3ds_audio.zig new file mode 100644 index 0000000..e61e0e3 --- /dev/null +++ b/src/platform/3ds/3ds_audio.zig @@ -0,0 +1,266 @@ +//! 3DS audio backend -- NDSP hardware voices. +//! +//! Each Aether mixer slot maps to one NDSP channel. The game thread refills +//! double-buffered linear-memory wave buffers from the Stream reader in +//! `update`; NDSP handles sample-rate conversion and channel mixing. + +const std = @import("std"); +const surface = @import("surface.zig"); +const Stream = @import("../../audio/stream.zig").Stream; +const PcmFormat = @import("../../audio/stream.zig").PcmFormat; + +const NUM_SLOTS: usize = 24; +const BUFFERS_PER_SLOT: usize = 2; +const SAMPLES_PER_BUF: usize = 4096; +const MAX_CHANNELS: usize = 2; +const MAX_BYTES_PER_SAMPLE: usize = 2; +const MAX_BYTES_PER_BUF: usize = SAMPLES_PER_BUF * MAX_CHANNELS * MAX_BYTES_PER_SAMPLE; +const TOTAL_AUDIO_BYTES: usize = NUM_SLOTS * BUFFERS_PER_SLOT * MAX_BYTES_PER_BUF; + +const NDSP_OUTPUT_STEREO: c_int = 1; +const NDSP_INTERP_LINEAR: c_int = 1; +const NDSP_FORMAT_MONO_PCM16: u16 = 5; +const NDSP_FORMAT_STEREO_PCM16: u16 = 6; +const NDSP_WBUF_DONE: u8 = 3; + +const Result = c_int; + +const NdspAdpcmData = extern struct { + index: u16, + history0: i16, + history1: i16, +}; + +const NdspWaveBuf = extern struct { + data_vaddr: ?*const anyopaque, + nsamples: u32, + adpcm_data: ?*NdspAdpcmData, + offset: u32, + looping: bool, + status: u8, + sequence_id: u16, + next: ?*NdspWaveBuf, +}; + +extern fn ndspInit() Result; +extern fn ndspExit() void; +extern fn ndspSetOutputMode(mode: c_int) void; +extern fn ndspChnReset(id: c_int) void; +extern fn ndspChnSetInterp(id: c_int, interp: c_int) void; +extern fn ndspChnSetRate(id: c_int, rate: f32) void; +extern fn ndspChnSetFormat(id: c_int, format: u16) void; +extern fn ndspChnSetMix(id: c_int, mix: *[12]f32) void; +extern fn ndspChnWaveBufClear(id: c_int) void; +extern fn ndspChnWaveBufAdd(id: c_int, buf: *NdspWaveBuf) void; +extern fn DSP_FlushDataCache(address: *const anyopaque, size: u32) Result; +extern fn linearAlloc(size: usize) ?*anyopaque; +extern fn linearFree(mem: ?*anyopaque) void; + +const SlotState = enum(u8) { + inactive = 0, + pending = 1, + active = 2, + finished = 3, +}; + +const Slot = struct { + state: SlotState = .inactive, + gain: f32 = 0, + pan: f32 = 0, + stream: Stream = undefined, + format: PcmFormat = .{ .sample_rate = 44_100, .channels = 1, .bit_depth = 16 }, + wave_bufs: [BUFFERS_PER_SLOT]NdspWaveBuf = undefined, +}; + +var slots: [NUM_SLOTS]Slot = init_slots(); +var audio_alloc: std.mem.Allocator = undefined; +var audio_io: std.Io = undefined; +var audio_data: ?[*]u8 = null; + +fn init_slots() [NUM_SLOTS]Slot { + var s: [NUM_SLOTS]Slot = undefined; + for (&s) |*slot| { + slot.* = .{}; + } + return s; +} + +pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { + audio_alloc = alloc; + audio_io = io; +} + +pub fn init() anyerror!void { + _ = audio_alloc; + _ = audio_io; + + audio_data = @ptrCast(linearAlloc(TOTAL_AUDIO_BYTES) orelse return error.AudioLinearAllocFailed); + + if (ndspInit() != 0) { + linearFree(audio_data); + audio_data = null; + return error.NdspInitFailed; + } + + ndspSetOutputMode(NDSP_OUTPUT_STEREO); + + for (0..NUM_SLOTS) |i| { + ndspChnReset(@intCast(i)); + slots[i] = .{}; + init_wave_bufs(i); + } +} + +pub fn deinit() void { + if (surface.is_system_closing()) { + slots = init_slots(); + audio_data = null; + return; + } + + for (0..NUM_SLOTS) |i| { + ndspChnWaveBufClear(@intCast(i)); + ndspChnReset(@intCast(i)); + slots[i].state = .inactive; + } + + ndspExit(); + + if (audio_data) |data| { + linearFree(data); + audio_data = null; + } +} + +pub fn update() void { + if (audio_data == null) return; + + for (&slots, 0..) |*slot, i| { + switch (slot.state) { + .inactive, .finished => {}, + .pending => start_slot(slot, i) catch { + slot.state = .finished; + }, + .active => refill_done_buffers(slot, i), + } + } +} + +pub fn max_voices() u32 { + return NUM_SLOTS; +} + +pub fn play_slot(slot: u8, stream: Stream) anyerror!void { + if (surface.is_system_closing()) return error.SystemClosing; + if (slot >= NUM_SLOTS) return error.InvalidArgs; + if (!format_supported(stream.format)) return error.UnsupportedFormat; + + const i: usize = slot; + ndspChnWaveBufClear(slot); + slots[i].stream = stream; + slots[i].format = stream.format; + slots[i].state = .pending; +} + +pub fn stop_slot(slot: u8) void { + if (surface.is_system_closing()) return; + if (slot >= NUM_SLOTS) return; + ndspChnWaveBufClear(slot); + slots[slot].state = .inactive; +} + +pub fn set_slot_gain_pan(slot: u8, gain: f32, pan: f32) void { + if (surface.is_system_closing()) return; + if (slot >= NUM_SLOTS) return; + slots[slot].gain = gain; + slots[slot].pan = pan; + if (slots[slot].state == .active) apply_mix(slot, &slots[slot]); +} + +pub fn is_slot_active(slot: u8) bool { + if (slot >= NUM_SLOTS) return false; + return slots[slot].state != .inactive and slots[slot].state != .finished; +} + +fn init_wave_bufs(slot_index: usize) void { + const base = audio_data.?; + const slot_base = slot_index * BUFFERS_PER_SLOT * MAX_BYTES_PER_BUF; + + for (&slots[slot_index].wave_bufs, 0..) |*buf, b| { + buf.* = .{ + .data_vaddr = @ptrCast(base + slot_base + b * MAX_BYTES_PER_BUF), + .nsamples = SAMPLES_PER_BUF, + .adpcm_data = null, + .offset = 0, + .looping = false, + .status = NDSP_WBUF_DONE, + .sequence_id = 0, + .next = null, + }; + } +} + +fn start_slot(slot: *Slot, slot_index: usize) !void { + const id: c_int = @intCast(slot_index); + + ndspChnWaveBufClear(id); + ndspChnReset(id); + ndspChnSetInterp(id, NDSP_INTERP_LINEAR); + ndspChnSetRate(id, @floatFromInt(slot.format.sample_rate)); + ndspChnSetFormat(id, if (slot.format.channels == 1) NDSP_FORMAT_MONO_PCM16 else NDSP_FORMAT_STEREO_PCM16); + apply_mix(@intCast(slot_index), slot); + + var queued: bool = false; + for (&slot.wave_bufs) |*buf| { + if (fill_wave_buf(slot, buf)) { + ndspChnWaveBufAdd(id, buf); + queued = true; + } else break; + } + + slot.state = if (queued) .active else .finished; +} + +fn refill_done_buffers(slot: *Slot, slot_index: usize) void { + const id: c_int = @intCast(slot_index); + + for (&slot.wave_bufs) |*buf| { + if (buf.status != NDSP_WBUF_DONE) continue; + if (!fill_wave_buf(slot, buf)) { + slot.state = .finished; + return; + } + ndspChnWaveBufAdd(id, buf); + } +} + +fn fill_wave_buf(slot: *Slot, buf: *NdspWaveBuf) bool { + const byte_count = SAMPLES_PER_BUF * slot.format.frame_size(); + if (byte_count > MAX_BYTES_PER_BUF) return false; + + const raw: [*]u8 = @ptrCast(@constCast(buf.data_vaddr.?)); + const dst = raw[0..byte_count]; + + slot.stream.reader.readSliceAll(dst) catch return false; + _ = DSP_FlushDataCache(buf.data_vaddr.?, @intCast(byte_count)); + + buf.nsamples = SAMPLES_PER_BUF; + buf.offset = 0; + buf.looping = false; + buf.status = NDSP_WBUF_DONE; + buf.next = null; + return true; +} + +fn apply_mix(slot: u8, s: *const Slot) void { + const left = s.gain * std.math.clamp(1.0 - s.pan, 0.0, 1.0); + const right = s.gain * std.math.clamp(1.0 + s.pan, 0.0, 1.0); + var mix: [12]f32 = @splat(0); + mix[0] = std.math.clamp(left, 0.0, 1.0); + mix[1] = std.math.clamp(right, 0.0, 1.0); + ndspChnSetMix(slot, &mix); +} + +fn format_supported(fmt: PcmFormat) bool { + return fmt.bit_depth == 16 and (fmt.channels == 1 or fmt.channels == 2); +} diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig new file mode 100644 index 0000000..391d155 --- /dev/null +++ b/src/platform/3ds/3ds_gfx.zig @@ -0,0 +1,1284 @@ +//! Mango/libctru graphics backend for Nintendo 3DS. + +const std = @import("std"); +const Util = @import("../../util/util.zig"); +const Mat4 = @import("../../math/math.zig").Mat4; +const Rendering = @import("../../rendering/rendering.zig"); +const Mesh = Rendering.mesh; +const Texture = Rendering.Texture; +const surface = @import("surface.zig"); +const shaders = @import("aether_shaders"); +const zitrus = @import("zitrus"); +const mango = zitrus.mango; +const pica = zitrus.hardware.pica; +const log = std.log.scoped(.gfx); + +const c = @cImport({ + @cDefine("wint_t", "unsigned int"); + @cInclude("3ds/types.h"); + @cInclude("3ds/gpu/enums.h"); + @cInclude("3ds/gpu/gpu.h"); + @cInclude("3ds/gpu/gx.h"); + @cInclude("3ds/os.h"); + @cInclude("3ds/services/gspgpu.h"); + @cInclude("3ds/gfx.h"); + @cInclude("3ds/allocator/vram.h"); +}); + +var render_alloc: std.mem.Allocator = undefined; +var render_io: std.Io = undefined; + +pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { + render_alloc = alloc; + render_io = io; +} + +const SCREEN_WIDTH: u32 = 800; +const SCREEN_HEIGHT: u32 = 240; +const TARGET_WIDTH: u16 = 240; +const TARGET_HEIGHT: u16 = 800; +const MAX_DEFERRED_MESH_FREES: usize = 4096; +const MAX_TEXTURE_SIZE: u32 = 1024; +const MIN_TEXTURE_SIZE: u32 = 8; +const TEX_BPP: usize = 4; +const CACHE_LINE_SIZE: usize = 32; +const OS_FCRAM_VADDR: usize = 0x30000000; +const OS_FCRAM_SIZE: usize = 0x10000000; +const OS_OLD_FCRAM_VADDR: usize = 0x14000000; +const OS_OLD_FCRAM_SIZE: usize = 0x08000000; + +const DISPLAY_TRANSFER_FLAGS: u32 = @intCast( + c.GX_TRANSFER_FLIP_VERT(0) | + c.GX_TRANSFER_OUT_TILED(0) | + c.GX_TRANSFER_RAW_COPY(0) | + c.GX_TRANSFER_IN_FORMAT(c.GX_TRANSFER_FMT_RGBA8) | + c.GX_TRANSFER_OUT_FORMAT(c.GX_TRANSFER_FMT_RGB8) | + c.GX_TRANSFER_SCALING(c.GX_TRANSFER_SCALE_NO), +); + +fn gx_buffer_dim(width: u16, height: u16) u32 { + return (@as(u32, height) << 16) | @as(u32, width); +} + +const VERTEX_STRIDE: usize = @sizeOf(Rendering.Vertex); +const POS_SCALE: [4]f32 = .{ snorm16_scale(), snorm16_scale(), snorm16_scale(), 1.0 }; +const UV_SCALE: [2]f32 = .{ snorm16_scale(), snorm16_scale() }; +const COLOR_SCALE: [4]f32 = .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }; +const ALPHA_TEST_REFERENCE: u8 = 25; +const FOG_LUT_NEAR: f32 = 0.01; + +comptime { + std.debug.assert(VERTEX_STRIDE == 16); + std.debug.assert(@offsetOf(Rendering.Vertex, "pos") == 0); + std.debug.assert(@offsetOf(Rendering.Vertex, "color") == 8); + std.debug.assert(@offsetOf(Rendering.Vertex, "uv") == 12); +} + +const PipelineData = struct { + shader: mango.Shader, + vertex_input: mango.VertexInputLayout, + sampler: mango.Sampler, +}; + +const RenderTargetData = struct { + color_memory: mango.DeviceMemory, + depth_memory: mango.DeviceMemory, + color_image: mango.Image, + depth_image: mango.Image, + color_view: mango.ImageView, + depth_view: mango.ImageView, + color_pixels: []u8, +}; + +const MeshData = struct { + memory: mango.DeviceMemory = .null, + buffer: mango.Buffer = .null, + mapped: []u8 = &.{}, + len: usize = 0, + capacity: usize = 0, +}; + +const DeferredMeshFree = struct { + memory: mango.DeviceMemory, + buffer: mango.Buffer, +}; + +const TextureData = struct { + width: u32, + height: u32, + upload_mode: TextureUploadMode, + memory: mango.DeviceMemory, + image: mango.Image, + view: mango.ImageView, +}; + +const TextureUploadMode = enum { + cpu_tiled, + transfer_tiled, +}; + +const texenv_primary: mango.TextureCombinerUnit = .{ + .color_src = @splat(.primary_color), + .alpha_src = @splat(.primary_color), + .color_factor = @splat(.src_color), + .alpha_factor = @splat(.src_alpha), + .color_op = .replace, + .alpha_op = .replace, + .color_scale = .@"1x", + .alpha_scale = .@"1x", + .constant = @splat(0xFF), +}; + +const texenv_texture_modulate_primary: mango.TextureCombinerUnit = .{ + .color_src = .{ .primary_color, .texture_0, .primary_color }, + .alpha_src = .{ .primary_color, .texture_0, .primary_color }, + .color_factor = @splat(.src_color), + .alpha_factor = @splat(.src_alpha), + .color_op = .modulate, + .alpha_op = .modulate, + .color_scale = .@"1x", + .alpha_scale = .@"1x", + .constant = @splat(0xFF), +}; + +const texenv_untextured = [_]mango.TextureCombinerUnit{ + texenv_primary, + mango.TextureCombinerUnit.previous, + mango.TextureCombinerUnit.previous, + mango.TextureCombinerUnit.previous, + mango.TextureCombinerUnit.previous, + mango.TextureCombinerUnit.previous, +}; + +const texenv_textured = [_]mango.TextureCombinerUnit{ + texenv_primary, + mango.TextureCombinerUnit.previous, + mango.TextureCombinerUnit.previous, + mango.TextureCombinerUnit.previous, + mango.TextureCombinerUnit.previous, + texenv_texture_modulate_primary, +}; + +const texenv_buffer_sources = [_]mango.TextureCombinerUnit.BufferSources{ + .previous, + .previous, + .previous, + .previous, +}; + +const FogState = struct { + enabled: bool = false, + start: f32 = 0.0, + end: f32 = 1.0, + color: [4]u8 = .{ 0, 0, 0, 255 }, + table: [128]u32 = @splat(0), +}; + +var meshes = Util.CircularBuffer(MeshData, 2048).init(); +var deferred_mesh_frees = Util.CircularBuffer(DeferredMeshFree, MAX_DEFERRED_MESH_FREES + 1).init(); +var textures = Util.CircularBuffer(TextureData, 64).init(); + +var device: mango.Device = .null; +var submit_queue: mango.Queue = .null; +var fill_queue: mango.Queue = .null; +var command_pool: mango.CommandPool = .null; +var command_buffer: mango.CommandBuffer = .null; +var render_pipeline: PipelineData = undefined; +var render_target: RenderTargetData = undefined; +var render_pipeline_initialized = false; +var render_target_initialized = false; +var command_resources_initialized = false; + +var projection_transform: Mat4 = Mat4.identity(); +var initialized = false; +var frame_started = false; +var trace_frames_remaining: u8 = 0; +var trace_frame_active = false; +var trace_frame_index: u8 = 0; +var trace_submit_index: u8 = 0; +var vsync_enabled = true; +var clear_color: [4]u8 = .{ 0, 0, 0, 255 }; +var alpha_blend_enabled = true; +var depth_write_enabled = true; +var cull_face_enabled = true; +var fog_state: FogState = .{}; +var uv_offset: [2]f32 = .{ 0.0, 0.0 }; +var proj_matrix: Mat4 = Mat4.identity(); +var view_matrix: Mat4 = Mat4.identity(); +var default_texture: Texture.Handle = 0; +var bound_texture: Texture.Handle = 0; +var bound_mesh: Mesh.Handle = 0; + +pub fn init() anyerror!void { + _ = render_io; + + c.gfxInitDefault(); + errdefer c.gfxExit(); + c.gfxSetWide(true); + + device = try mango.createAetherCtruBackedDevice(.{ .linear_gpa = render_alloc }, render_alloc); + errdefer { + device.destroy(); + device = .null; + } + + submit_queue = device.getQueue(.submit); + fill_queue = device.getQueue(.fill); + + try init_command_resources(); + errdefer deinit_command_resources(); + + try init_render_target(); + errdefer deinit_render_target(); + + render_pipeline = try init_pipeline(); + render_pipeline_initialized = true; + errdefer { + deinit_pipeline(&render_pipeline); + render_pipeline_initialized = false; + } + + init_projection_transform(); + initialized = true; + frame_started = false; +} + +pub fn deinit() void { + if (surface.is_system_closing()) { + abandon_service_resources(); + return; + } + + frame_started = false; + if (initialized and device != .null) device.waitIdle(); + free_deferred_mesh_resources(); + + for (1..textures.buffer.len) |i| { + if (textures.buffer[i]) |*tex| free_texture(tex); + } + textures.clear(); + + if (render_pipeline_initialized) { + deinit_pipeline(&render_pipeline); + render_pipeline_initialized = false; + } + + for (1..meshes.buffer.len) |i| { + if (meshes.buffer[i]) |*mesh| free_mesh(mesh); + } + meshes.clear(); + + deinit_render_target(); + deinit_command_resources(); + + if (device != .null) { + device.destroy(); + device = .null; + } + + if (initialized) { + c.gfxExit(); + initialized = false; + } +} + +pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { + const next = [4]u8{ + float_to_u8(r), + float_to_u8(g), + float_to_u8(b), + float_to_u8(a), + }; + if (std.mem.eql(u8, &clear_color, &next)) return; + clear_color = next; +} + +pub fn set_alpha_blend(enabled: bool) void { + if (alpha_blend_enabled == enabled) return; + alpha_blend_enabled = enabled; + if (frame_started) apply_dynamic_state(); +} + +pub fn set_depth_write(enabled: bool) void { + if (depth_write_enabled == enabled) return; + depth_write_enabled = enabled; + if (frame_started) apply_dynamic_state(); +} + +pub fn set_fog(enabled: bool, start: f32, end: f32, r: f32, g: f32, b: f32) void { + const color = [4]u8{ float_to_u8(r), float_to_u8(g), float_to_u8(b), 255 }; + if (fog_state.enabled == enabled and float_bits_equal(fog_state.start, start) and float_bits_equal(fog_state.end, end) and std.mem.eql(u8, &fog_state.color, &color)) return; + + fog_state.enabled = enabled; + fog_state.start = start; + fog_state.end = end; + fog_state.color = color; + rebuild_fog_table(); + if (frame_started) apply_fog_state(); +} + +pub fn set_clip_planes(_: bool) void {} + +pub fn set_culling(enabled: bool) void { + if (cull_face_enabled == enabled) return; + cull_face_enabled = enabled; + if (frame_started) apply_dynamic_state(); +} + +pub fn set_uv_offset(u: f32, v: f32) void { + if (float_bits_equal(uv_offset[0], u) and float_bits_equal(uv_offset[1], v)) return; + uv_offset = .{ u, v }; + if (frame_started) upload_frame_uniforms(); +} + +pub fn trace_next_frames(count: u8) void { + trace_frames_remaining = count; +} + +pub fn set_proj_matrix(mat: *const Mat4) void { + if (mat4_bits_equal(&proj_matrix, mat)) return; + proj_matrix = mat.*; + if (frame_started) upload_frame_uniforms(); +} + +pub fn set_view_matrix(mat: *const Mat4) void { + if (mat4_bits_equal(&view_matrix, mat)) return; + view_matrix = mat.*; +} + +pub fn start_frame() bool { + if (surface.is_system_closing()) return false; + if (!initialized or frame_started) return false; + + trace_frame_active = trace_frames_remaining > 0; + if (trace_frame_active) { + trace_frame_index += 1; + trace_submit_index = 0; + log.info("trace: 3ds frame {d} start_frame begin", .{trace_frame_index}); + } + + free_deferred_mesh_resources(); + + frame_started = true; + clear_frame_targets() catch { + if (trace_frame_active) log.info("trace: 3ds frame {d} clear targets failed", .{trace_frame_index}); + frame_started = false; + trace_frame_active = false; + return false; + }; + if (trace_frame_active) log.info("trace: 3ds frame {d} clear targets done", .{trace_frame_index}); + begin_command_buffer() catch { + if (trace_frame_active) log.info("trace: 3ds frame {d} begin command buffer failed", .{trace_frame_index}); + frame_started = false; + trace_frame_active = false; + return false; + }; + if (trace_frame_active) log.info("trace: 3ds frame {d} start_frame end", .{trace_frame_index}); + return true; +} + +pub fn end_frame() void { + if (!frame_started) return; + if (trace_frame_active) log.info("trace: 3ds frame {d} end_frame begin", .{trace_frame_index}); + if (surface.is_system_closing()) { + frame_started = false; + trace_frame_active = false; + return; + } + + finish_command_buffer() catch { + if (trace_frame_active) log.info("trace: 3ds frame {d} finish command buffer failed", .{trace_frame_index}); + frame_started = false; + trace_frame_active = false; + return; + }; + if (trace_frame_active) log.info("trace: 3ds frame {d} command buffer finished", .{trace_frame_index}); + if (surface.is_system_closing()) { + frame_started = false; + trace_frame_active = false; + return; + } + if (trace_frame_active) log.info("trace: 3ds frame {d} present begin", .{trace_frame_index}); + present_render_target(); + if (trace_frame_active) { + log.info("trace: 3ds frame {d} present end", .{trace_frame_index}); + trace_frames_remaining -= 1; + } + frame_started = false; + trace_frame_active = false; +} + +pub fn clear_depth() void { + if (!frame_started) return; + + finish_command_buffer() catch { + frame_started = false; + return; + }; + clear_depth_target() catch { + frame_started = false; + return; + }; + begin_command_buffer() catch { + frame_started = false; + return; + }; +} + +pub fn set_vsync(v: bool) void { + vsync_enabled = v; +} + +pub fn create_mesh() anyerror!Mesh.Handle { + const handle = meshes.add_element(.{}) orelse return error.OutOfMeshes; + + return @intCast(handle); +} + +pub fn destroy_mesh(handle: Mesh.Handle) void { + if (surface.is_system_closing()) { + _ = meshes.remove_element(handle); + return; + } + + if (mesh_slot(handle)) |mesh| free_mesh(mesh); + if (bound_mesh == handle) bound_mesh = 0; + _ = meshes.remove_element(handle); +} + +pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { + if (surface.is_system_closing()) return; + + const mesh = mesh_slot(handle) orelse return; + if (data.len > std.math.maxInt(u32)) { + std.debug.panic("3ds_gfx: mesh vertex data is too large to flush", .{}); + } + + if (data.len == 0) { + mesh.len = 0; + return; + } + + if (is_linear_fcram(data.ptr, data.len)) { + bind_mesh_to_linear_slice(mesh, data) catch + std.debug.panic("3ds_gfx: out of memory for mesh buffer handle", .{}); + flush_linear_memory(data.ptr, data.len) catch {}; + if (frame_started and bound_mesh == handle) bound_mesh = 0; + return; + } + + ensure_mesh_capacity(mesh, data.len) catch + std.debug.panic("3ds_gfx: out of linear memory for mesh upload", .{}); + + @memcpy(mesh.mapped[0..data.len], data); + mesh.len = data.len; + flush_memory(mesh.memory, data.len) catch {}; + if (frame_started and bound_mesh == handle) bound_mesh = 0; +} + +pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { + if (surface.is_system_closing()) return; + if (!frame_started or !render_pipeline_initialized) return; + + const mesh = mesh_slot(handle) orelse return; + if (count == 0 or mesh.len == 0 or mesh.buffer == .null) return; + + const needed = mesh_draw_bytes_needed(count) orelse return; + if (needed > mesh.len) return; + + upload_draw_uniforms(model); + + if (bound_mesh != handle) { + command_buffer.bindVertexBuffersSlice(0, &.{mesh.buffer}, &.{0}); + bound_mesh = handle; + } + + command_buffer.draw(@intCast(@min(count, std.math.maxInt(u32))), 0); +} + +pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { + try validate_texture(width, height, data); + + var tex = try create_texture_resources(width, height); + errdefer free_texture(&tex); + + try upload_texture_data(&tex, data[0..texture_size(width, height)]); + + const handle: Texture.Handle = @intCast(textures.add_element(tex) orelse return error.OutOfTextures); + if (default_texture == 0) default_texture = handle; + return handle; +} + +pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { + if (surface.is_system_closing()) return; + + const tex = texture_slot(handle) orelse return; + const size = texture_size(tex.width, tex.height); + if (data.len < size) return; + + upload_texture_data(tex, data[0..size]) catch return; + if (frame_started and bound_texture == handle) bind_current_texture(); +} + +pub fn bind_texture(handle: Texture.Handle) void { + if (surface.is_system_closing()) return; + if (bound_texture == handle) return; + bound_texture = handle; + if (frame_started) bind_current_texture(); +} + +pub fn destroy_texture(handle: Texture.Handle) void { + if (surface.is_system_closing()) { + if (bound_texture == handle) bound_texture = 0; + if (default_texture == handle) default_texture = 0; + _ = textures.remove_element(handle); + return; + } + + if (texture_slot(handle)) |tex| free_texture(tex); + if (bound_texture == handle) bound_texture = 0; + if (default_texture == handle) default_texture = 0; + _ = textures.remove_element(handle); + if (frame_started) bind_current_texture(); +} + +pub fn force_texture_resident(_: Texture.Handle) void {} + +fn init_command_resources() !void { + command_pool = try device.createCommandPool(.no_preheat, null); + errdefer { + device.destroyCommandPool(command_pool, null); + command_pool = .null; + } + + var buffers: [1]mango.CommandBuffer = undefined; + try device.allocateCommandBuffers(.{ + .pool = command_pool, + .command_buffer_count = buffers.len, + }, &buffers); + command_buffer = buffers[0]; + command_resources_initialized = true; +} + +fn deinit_command_resources() void { + if (!command_resources_initialized or device == .null) return; + if (command_buffer != .null) { + device.freeCommandBuffers(command_pool, &.{command_buffer}); + command_buffer = .null; + } + if (command_pool != .null) { + device.destroyCommandPool(command_pool, null); + command_pool = .null; + } + command_resources_initialized = false; +} + +fn init_render_target() !void { + const color_size = mango.Format.a8b8g8r8_unorm.scale(@as(usize, TARGET_WIDTH) * TARGET_HEIGHT); + const depth_size = mango.Format.d24_unorm_s8_uint.scale(@as(usize, TARGET_WIDTH) * TARGET_HEIGHT); + + const color_memory = try device.allocateMemory(.{ + .memory_type = .vram_a, + .allocation_size = .size(@intCast(color_size)), + }, null); + errdefer device.freeMemory(color_memory, null); + + const depth_memory = try device.allocateMemory(.{ + .memory_type = .vram_b, + .allocation_size = .size(@intCast(depth_size)), + }, null); + errdefer device.freeMemory(depth_memory, null); + + const color_image = try device.createImage(.{ + .flags = .{}, + .type = .@"2d", + .tiling = .optimal, + .usage = .{ .transfer_src = true, .color_attachment = true }, + .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT }, + .format = .a8b8g8r8_unorm, + .mip_levels = .@"1", + .array_layers = .@"1", + }, null); + errdefer device.destroyImage(color_image, null); + try device.bindImageMemory(color_image, color_memory, .size(0)); + + const depth_image = try device.createImage(.{ + .flags = .{}, + .type = .@"2d", + .tiling = .optimal, + .usage = .{ .depth_stencil_attachment = true }, + .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT }, + .format = .d24_unorm_s8_uint, + .mip_levels = .@"1", + .array_layers = .@"1", + }, null); + errdefer device.destroyImage(depth_image, null); + try device.bindImageMemory(depth_image, depth_memory, .size(0)); + + const color_view = try device.createImageView(.{ + .type = .@"2d", + .format = .a8b8g8r8_unorm, + .image = color_image, + .subresource_range = .full, + }, null); + errdefer device.destroyImageView(color_view, null); + + const depth_view = try device.createImageView(.{ + .type = .@"2d", + .format = .d24_unorm_s8_uint, + .image = depth_image, + .subresource_range = .full, + }, null); + errdefer device.destroyImageView(depth_view, null); + + const color_pixels = try device.mapMemory(color_memory, .size(0), .whole); + + render_target = .{ + .color_memory = color_memory, + .depth_memory = depth_memory, + .color_image = color_image, + .depth_image = depth_image, + .color_view = color_view, + .depth_view = depth_view, + .color_pixels = color_pixels, + }; + render_target_initialized = true; +} + +fn deinit_render_target() void { + if (!render_target_initialized or device == .null) return; + device.unmapMemory(render_target.color_memory); + device.destroyImageView(render_target.depth_view, null); + device.destroyImageView(render_target.color_view, null); + device.destroyImage(render_target.depth_image, null); + device.destroyImage(render_target.color_image, null); + device.freeMemory(render_target.depth_memory, null); + device.freeMemory(render_target.color_memory, null); + render_target_initialized = false; +} + +fn init_pipeline() !PipelineData { + const code: []const u8 = &shaders.basic_vert; + const shader = try device.createShader(.init(.psh, code, "main"), null); + errdefer device.destroyShader(shader, null); + + const bindings = [_]mango.VertexInputBindingDescription{ + .{ .stride = VERTEX_STRIDE }, + }; + const attributes = [_]mango.VertexInputAttributeDescription{ + .{ .location = .v0, .binding = .@"0", .format = .r16g16b16a16_sscaled, .offset = 0 }, + .{ .location = .v1, .binding = .@"0", .format = .r8g8b8a8_uscaled, .offset = 8 }, + .{ .location = .v2, .binding = .@"0", .format = .r16g16_sscaled, .offset = 12 }, + }; + const vertex_input = try device.createVertexInputLayout(.init(&bindings, &attributes, &.{}), null); + errdefer device.destroyVertexInputLayout(vertex_input, null); + + const sampler = try device.createSampler(.{ + .mag_filter = .nearest, + .min_filter = .nearest, + .mip_filter = .nearest, + .address_mode_u = .repeat, + .address_mode_v = .repeat, + .lod_bias = 0.0, + .min_lod = 0, + .max_lod = 0, + .border_color = .{ 0, 0, 0, 0 }, + }, null); + + return .{ + .shader = shader, + .vertex_input = vertex_input, + .sampler = sampler, + }; +} + +fn deinit_pipeline(pl: *PipelineData) void { + device.destroySampler(pl.sampler, null); + device.destroyVertexInputLayout(pl.vertex_input, null); + device.destroyShader(pl.shader, null); +} + +fn clear_frame_targets() !void { + try fill_queue.clearColorImage(.{ + .image = render_target.color_image, + .color = clear_color, + .subresource_range = .full, + }); + try clear_depth_target(); +} + +fn clear_depth_target() !void { + try fill_queue.clearDepthStencilImage(.{ + .image = render_target.depth_image, + .depth = 1.0, + .stencil = 0, + .subresource_range = .full, + }); + // device.waitIdle(); +} + +fn begin_command_buffer() !void { + bound_mesh = 0; + + try command_buffer.begin(); + command_buffer.bindShaders(&.{.vertex}, &.{render_pipeline.shader}); + command_buffer.setVertexInput(render_pipeline.vertex_input); + command_buffer.setLightingEnable(false); + command_buffer.setLightEnvironmentEnable(.{}); + command_buffer.setLogicOpEnable(false); + command_buffer.setAlphaTestCompareOp(.gt); + command_buffer.setAlphaTestReference(ALPHA_TEST_REFERENCE); + command_buffer.setStencilTestEnable(false); + command_buffer.setDepthTestEnable(true); + command_buffer.setDepthMode(.z_buffer); + command_buffer.setDepthCompareOp(.lt); + command_buffer.setPrimitiveTopology(.triangle_list); + command_buffer.setFrontFace(.ccw); + command_buffer.setColorWriteMask(.rgba); + command_buffer.setViewport(.{ + .rect = .{ .offset = .{ .x = 0, .y = 0 }, .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT } }, + .min_depth = 0.0, + .max_depth = 1.0, + }); + command_buffer.setScissor(.inside(.{ + .offset = .{ .x = 0, .y = 0 }, + .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT }, + })); + apply_dynamic_state(); + bind_current_texture(); + apply_fog_state(); + upload_frame_uniforms(); + command_buffer.beginRendering(.{ + .color_attachment = render_target.color_view, + .depth_stencil_attachment = render_target.depth_view, + }); +} + +fn finish_command_buffer() !void { + if (trace_frame_active) { + trace_submit_index += 1; + log.info("trace: 3ds frame {d} submit {d} begin", .{ trace_frame_index, trace_submit_index }); + } + command_buffer.endRendering(); + if (trace_frame_active) log.info("trace: 3ds frame {d} submit {d} end rendering done", .{ trace_frame_index, trace_submit_index }); + try command_buffer.end(); + if (trace_frame_active) log.info("trace: 3ds frame {d} submit {d} command buffer end done", .{ trace_frame_index, trace_submit_index }); + if (surface.is_system_closing()) return; + try submit_queue.submit(.{ .command_buffer = command_buffer }); + if (trace_frame_active) log.info("trace: 3ds frame {d} submit {d} queued", .{ trace_frame_index, trace_submit_index }); +} + +fn present_render_target() void { + if (surface.is_system_closing()) return; + var fb_width: u16 = 0; + var fb_height: u16 = 0; + const framebuffer = c.gfxGetFramebuffer(c.GFX_TOP, c.GFX_LEFT, &fb_width, &fb_height) orelse return; + if (surface.is_system_closing()) return; + + _ = c.GX_DisplayTransfer( + @ptrCast(@alignCast(render_target.color_pixels.ptr)), + gx_buffer_dim(TARGET_WIDTH, TARGET_HEIGHT), + @ptrCast(@alignCast(framebuffer)), + gx_buffer_dim(TARGET_WIDTH, TARGET_HEIGHT), + DISPLAY_TRANSFER_FLAGS, + ); + if (surface.is_system_closing()) return; + if (trace_frame_active) log.info("trace: 3ds frame {d} present wait ppf begin", .{trace_frame_index}); + c.gspWaitForEvent(c.GSPGPU_EVENT_PPF, false); + if (trace_frame_active) log.info("trace: 3ds frame {d} present wait ppf end", .{trace_frame_index}); + if (surface.is_system_closing()) return; + c.gfxSwapBuffers(); + if (trace_frame_active) log.info("trace: 3ds frame {d} swap buffers done", .{trace_frame_index}); + if (surface.is_system_closing()) return; + if (trace_frame_active and vsync_enabled) log.info("trace: 3ds frame {d} wait vblank begin", .{trace_frame_index}); + // PPF can cross VBlank. Waiting for the "next" VBlank here would discard + // that event and force an extra display interval, effectively capping at + // 30 FPS when the frame finishes near the refresh boundary. + if (vsync_enabled) c.gspWaitForEvent(c.GSPGPU_EVENT_VBlank0, false); + if (trace_frame_active and vsync_enabled) log.info("trace: 3ds frame {d} wait vblank end", .{trace_frame_index}); +} + +fn apply_dynamic_state() void { + command_buffer.setDepthWriteEnable(depth_write_enabled); + command_buffer.setCullMode(if (cull_face_enabled) .back else .none); + command_buffer.setAlphaTestEnable(alpha_blend_enabled); + command_buffer.setBlendEquation(if (alpha_blend_enabled) .{ + .src_color_factor = .src_alpha, + .dst_color_factor = .one_minus_src_alpha, + .color_op = .add, + .src_alpha_factor = .one, + .dst_alpha_factor = .one_minus_src_alpha, + .alpha_op = .add, + } else .{ + .src_color_factor = .one, + .dst_color_factor = .zero, + .color_op = .add, + .src_alpha_factor = .one, + .dst_alpha_factor = .zero, + .alpha_op = .add, + }); +} + +fn bind_current_texture() void { + if (!frame_started) return; + + const effective_texture = if (bound_texture != 0) bound_texture else default_texture; + if (effective_texture == 0) { + command_buffer.bindCombinedImageSamplers(0, &.{mango.CombinedImageSampler.none}); + bind_texenv(false); + return; + } + + const tex = texture_slot(effective_texture) orelse { + command_buffer.bindCombinedImageSamplers(0, &.{mango.CombinedImageSampler.none}); + bind_texenv(false); + return; + }; + + command_buffer.bindCombinedImageSamplers(0, &.{.{ + .image = tex.view, + .sampler = render_pipeline.sampler, + }}); + bind_texenv(true); +} + +fn bind_texenv(textured: bool) void { + const stages = if (textured) &texenv_textured else &texenv_untextured; + command_buffer.setTextureCombiners(stages, &texenv_buffer_sources); +} + +fn apply_fog_state() void { + command_buffer.setAetherFog(fog_state.enabled, fog_state.color, if (fog_state.enabled) &fog_state.table else &.{}); +} + +fn upload_frame_uniforms() void { + const projection_rows = mat4_to_uniform_rows(Mat4.mul(proj_matrix, projection_transform)); + const constants = [3][4]f32{ + POS_SCALE, + .{ UV_SCALE[0], UV_SCALE[1], uv_offset[0], uv_offset[1] }, + COLOR_SCALE, + }; + + command_buffer.bindFloatUniforms(.vertex, 0, &projection_rows); + command_buffer.bindFloatUniforms(.vertex, 8, &constants); +} + +fn upload_draw_uniforms(model: *const Mat4) void { + const model_view_rows = mat4_to_uniform_rows(Mat4.mul(model.*, view_matrix)); + command_buffer.bindFloatUniforms(.vertex, 4, &model_view_rows); +} + +fn mat4_to_uniform_rows(mat: Mat4) [4][4]f32 { + var out: [4][4]f32 = undefined; + inline for (0..4) |row| { + out[row] = .{ mat.data[0][row], mat.data[1][row], mat.data[2][row], mat.data[3][row] }; + } + return out; +} + +fn mat4_bits_equal(a: *const Mat4, b: *const Mat4) bool { + return std.mem.eql(u8, std.mem.asBytes(a), std.mem.asBytes(b)); +} + +fn float_bits_equal(a: f32, b: f32) bool { + return @as(u32, @bitCast(a)) == @as(u32, @bitCast(b)); +} + +fn init_projection_transform() void { + projection_transform = Mat4.mul(logical_viewport_transform(), ortho_tilt(0.0, @floatFromInt(SCREEN_WIDTH), 0.0, @floatFromInt(SCREEN_HEIGHT), 0.0, 1.0)); +} + +fn logical_viewport_transform() Mat4 { + return .{ .data = .{ + .{ @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5, 0.0, 0.0, 0.0 }, + .{ 0.0, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5, 0.0, 0.0 }, + .{ 0.0, 0.0, -1.0, 0.0 }, + .{ @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5, 1.0, 1.0 }, + } }; +} + +// Aether Mat4 uses row-vector multiplication. This is Citro3D's +// Mtx_OrthoTilt transposed into that convention; mat4_to_uniform_rows() +// transposes it back for the PICA shader's matrix * vector dp4 sequence. +fn ortho_tilt(left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32) Mat4 { + const rl = right - left; + const tb = top - bottom; + const fnv = far - near; + return .{ .data = .{ + .{ 0.0, -2.0 / rl, 0.0, 0.0 }, + .{ 2.0 / tb, 0.0, 0.0, 0.0 }, + .{ 0.0, 0.0, 1.0 / fnv, 0.0 }, + .{ -((top + bottom) / tb), (right + left) / rl, 0.5 * ((near + far) / (near - far)) - 0.5, 1.0 }, + } }; +} + +fn ensure_mesh_capacity(mesh: *MeshData, len: usize) !void { + if (mesh.memory != .null and mesh.capacity >= len and mesh.buffer != .null) return; + + free_mesh(mesh); + + const cap = std.mem.alignForward(usize, @max(len, 256), CACHE_LINE_SIZE); + const memory = try device.allocateMemory(.{ + .memory_type = .fcram_cached, + .allocation_size = .size(@intCast(cap)), + }, null); + errdefer device.freeMemory(memory, null); + + const buffer = try device.createBuffer(.{ + .size = .size(@intCast(cap)), + .usage = .{ .vertex_buffer = true }, + }, null); + errdefer device.destroyBuffer(buffer, null); + + try device.bindBufferMemory(buffer, memory, .size(0)); + const mapped = try device.mapMemory(memory, .size(0), .whole); + if (!is_linear_fcram(mapped.ptr, mapped.len)) { + std.debug.panic("3ds_gfx: mesh upload buffer must be allocated in linear FCRAM", .{}); + } + + mesh.* = .{ + .memory = memory, + .buffer = buffer, + .mapped = mapped, + .capacity = cap, + .len = 0, + }; +} + +fn bind_mesh_to_linear_slice(mesh: *MeshData, data: []const u8) !void { + if (mesh.memory != .null) free_mesh(mesh); + + const buffer = if (mesh.buffer != .null) + mesh.buffer + else + try create_external_vertex_buffer(); + + mango.updateAetherLinearVertexBuffer(buffer, data.ptr, data.len, @intCast(c.osConvertVirtToPhys(data.ptr))); + + mesh.* = .{ + .memory = .null, + .buffer = buffer, + .mapped = &.{}, + .len = data.len, + .capacity = data.len, + }; +} + +fn create_external_vertex_buffer() !mango.Buffer { + return mango.createAetherLinearVertexBuffer(render_alloc); +} + +fn free_mesh(mesh: *MeshData) void { + if (mesh.buffer != .null or mesh.memory != .null) { + if (frame_started) { + defer_mesh_free(.{ .memory = mesh.memory, .buffer = mesh.buffer }); + } else { + destroy_mesh_resources(mesh.memory, mesh.buffer); + } + } + mesh.* = .{}; +} + +fn destroy_mesh_resources(memory: mango.DeviceMemory, buffer: mango.Buffer) void { + if (buffer != .null) device.destroyBuffer(buffer, null); + if (memory != .null) { + device.unmapMemory(memory); + device.freeMemory(memory, null); + } +} + +fn defer_mesh_free(free: DeferredMeshFree) void { + if (deferred_mesh_frees.add_element(free) != null) return; + std.debug.panic("3ds_gfx: deferred mesh free queue exhausted", .{}); +} + +fn free_deferred_mesh_resources() void { + for (1..deferred_mesh_frees.buffer.len) |i| { + if (deferred_mesh_frees.buffer[i]) |free| { + destroy_mesh_resources(free.memory, free.buffer); + } + } + deferred_mesh_frees.clear(); +} + +fn abandon_service_resources() void { + frame_started = false; + initialized = false; + render_pipeline_initialized = false; + render_target_initialized = false; + command_resources_initialized = false; + device = .null; + submit_queue = .null; + fill_queue = .null; + default_texture = 0; + bound_texture = 0; + meshes.clear(); + textures.clear(); + deferred_mesh_frees.clear(); +} + +fn mesh_draw_bytes_needed(count: usize) ?usize { + if (count == 0) return 0; + if (count > std.math.maxInt(usize) / VERTEX_STRIDE) return null; + return count * VERTEX_STRIDE; +} + +fn create_texture_resources(width: u32, height: u32) !TextureData { + const size = texture_size(width, height); + const upload_mode = texture_upload_mode(width, height); + // 3DSX launchers can map VRAM read-only for the CPU. Keep texture + // storage in linear FCRAM; PICA can still sample it by physical address, + // and the transfer queue can still write tiled output here. + const memory = try device.allocateMemory(.{ + .memory_type = .fcram_cached, + .allocation_size = .size(size), + }, null); + errdefer device.freeMemory(memory, null); + + const image = try device.createImage(.{ + .flags = .{}, + .type = .@"2d", + .tiling = .optimal, + .usage = .{ .sampled = true, .transfer_dst = true }, + .extent = .{ .width = @intCast(width), .height = @intCast(height) }, + .format = .a8b8g8r8_unorm, + .mip_levels = .@"1", + .array_layers = .@"1", + }, null); + errdefer device.destroyImage(image, null); + try device.bindImageMemory(image, memory, .size(0)); + + const view = try device.createImageView(.{ + .type = .@"2d", + .format = .a8b8g8r8_unorm, + .image = image, + .subresource_range = .full, + }, null); + + return .{ + .width = width, + .height = height, + .upload_mode = upload_mode, + .memory = memory, + .image = image, + .view = view, + }; +} + +fn free_texture(tex: *TextureData) void { + device.destroyImageView(tex.view, null); + device.destroyImage(tex.image, null); + device.freeMemory(tex.memory, null); +} + +fn validate_texture(width: u32, height: u32, data: []align(16) u8) !void { + if (width < MIN_TEXTURE_SIZE or height < MIN_TEXTURE_SIZE) { + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too small; 3DS requires at least {d}x{d}", .{ + width, + height, + MIN_TEXTURE_SIZE, + MIN_TEXTURE_SIZE, + }); + return error.TextureTooSmall; + } + if (width > MAX_TEXTURE_SIZE or height > MAX_TEXTURE_SIZE) { + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too large; 3DS limit is {d}x{d}", .{ + width, + height, + MAX_TEXTURE_SIZE, + MAX_TEXTURE_SIZE, + }); + return error.TextureTooLarge; + } + if (!std.math.isPowerOfTwo(width) or !std.math.isPowerOfTwo(height)) { + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is unsupported; 3DS textures require power-of-two dimensions", .{ width, height }); + return error.UnsupportedTextureSize; + } + + const size = texture_size(width, height); + if (data.len < size) return error.InsufficientData; +} + +fn upload_texture_data(tex: *TextureData, data: []align(16) const u8) !void { + switch (tex.upload_mode) { + .cpu_tiled => return upload_texture_data_cpu(tex, data), + .transfer_tiled => return upload_texture_data_transfer(tex, data), + } +} + +fn upload_texture_data_cpu(tex: *TextureData, data: []align(16) const u8) !void { + const mapped = try device.mapMemory(tex.memory, .size(0), .whole); + defer device.unmapMemory(tex.memory); + + @memset(mapped, 0); + convert_texture_data_tiled_abgr(mapped, data, tex.width, tex.height); + try flush_memory(tex.memory, mapped.len); + device.waitIdle(); +} + +fn upload_texture_data_transfer(tex: *TextureData, data: []align(16) const u8) !void { + const size = texture_size(tex.width, tex.height); + + const staging_memory = try device.allocateMemory(.{ + .memory_type = .fcram_cached, + .allocation_size = .size(size), + }, null); + defer device.freeMemory(staging_memory, null); + + const staging_buffer = try device.createBuffer(.{ + .size = .size(size), + .usage = .{ .transfer_src = true }, + }, null); + defer device.destroyBuffer(staging_buffer, null); + try device.bindBufferMemory(staging_buffer, staging_memory, .size(0)); + + const mapped = try device.mapMemory(staging_memory, .size(0), .size(size)); + defer device.unmapMemory(staging_memory); + + convert_texture_data_linear_abgr(mapped, data, tex.width, tex.height); + try flush_memory(staging_memory, mapped.len); + + try device.getQueue(.transfer).copyBufferToImage(.{ + .src_buffer = staging_buffer, + .src_offset = .size(0), + .dst_image = tex.image, + .dst_subresource = .full, + }); + device.waitIdle(); +} + +fn convert_texture_data_tiled_abgr(dst: []u8, src: []align(16) const u8, width: u32, height: u32) void { + for (0..height) |y| { + const yu: u32 = @intCast(y); + const dst_y = height - 1 - yu; + for (0..width) |x| { + const xu: u32 = @intCast(x); + const src_off = (@as(usize, yu) * width + xu) * TEX_BPP; + const dst_off = tiled_pixel_offset(width, xu, dst_y); + write_abgr8888(dst[dst_off..][0..TEX_BPP], src[src_off..][0..TEX_BPP]); + } + } +} + +fn convert_texture_data_linear_abgr(dst: []u8, src: []align(16) const u8, width: u32, height: u32) void { + for (0..height) |y| { + const yu: u32 = @intCast(y); + const src_y = height - 1 - yu; + for (0..width) |x| { + const xu: u32 = @intCast(x); + const src_off = (@as(usize, src_y) * width + xu) * TEX_BPP; + const dst_off = (@as(usize, yu) * width + xu) * TEX_BPP; + write_abgr8888(dst[dst_off..][0..TEX_BPP], src[src_off..][0..TEX_BPP]); + } + } +} + +fn write_abgr8888(dst: []u8, src_rgba: []const u8) void { + dst[0] = src_rgba[3]; + dst[1] = src_rgba[2]; + dst[2] = src_rgba[1]; + dst[3] = src_rgba[0]; +} + +fn tiled_pixel_offset(width: u32, x: u32, y: u32) usize { + const tile_size = 8; + const tile_pixels = tile_size * tile_size; + const tile_x = x / tile_size; + const tile_y = y / tile_size; + const tiles_per_row = width / tile_size; + const subtile_x: u3 = @intCast(x & (tile_size - 1)); + const subtile_y: u3 = @intCast(y & (tile_size - 1)); + const subtile = pica.morton.toIndex(u3, 2, .{ subtile_x, subtile_y }); + const pixel = (tile_y * tiles_per_row + tile_x) * tile_pixels + subtile; + return @as(usize, pixel) * TEX_BPP; +} + +fn texture_upload_mode(width: u32, height: u32) TextureUploadMode { + // GX_DisplayTransfer needs at least 64x16 for linear<->tiled transfers. + // Smaller textures are still valid sampled images, so keep them on the + // CPU Morton path. + return if (width >= 64 and height >= 16) .transfer_tiled else .cpu_tiled; +} + +fn texture_size(width: u32, height: u32) u32 { + return @intCast(@as(usize, width) * height * TEX_BPP); +} + +fn rebuild_fog_table() void { + const safe_end = if (fog_state.end <= fog_state.start) fog_state.start + 0.001 else fog_state.end; + const lut_near = @min(FOG_LUT_NEAR, safe_end * 0.5); + const lut_far = @max(safe_end, lut_near + 0.001); + + var values: [129]f32 = undefined; + for (&values, 0..) |*value, i| { + const t = @as(f32, @floatFromInt(i)) / 128.0; + const distance = fog_lut_calc_z(1.0 - t, lut_near, lut_far); + const factor = std.math.clamp((safe_end - distance) / (safe_end - fog_state.start), 0.0, 1.0); + value.* = factor; + } + + for (&fog_state.table, 0..) |*raw, i| { + const current = values[i]; + const next = values[i + 1]; + const lut_value = pica.Graphics.TextureCombiners.FogLutValue{ + .value = .ofSaturating(current), + .next_difference = .ofSaturating(next - current), + }; + raw.* = @bitCast(zitrus.hardware.LsbRegister(pica.Graphics.TextureCombiners.FogLutValue).init(lut_value)); + } +} + +fn fog_lut_calc_z(depth: f32, near: f32, far: f32) f32 { + return far * near / (depth * (far - near) + near); +} + +fn flush_memory(memory: mango.DeviceMemory, len: usize) !void { + try device.flushMappedMemoryRanges(&.{.{ + .memory = memory, + .offset = .size(0), + .size = .size(@intCast(len)), + }}); +} + +fn flush_linear_memory(ptr: [*]const u8, len: usize) !void { + if (len == 0) return; + if (c.GSPGPU_FlushDataCache(ptr, @intCast(len)) != 0) return error.Unexpected; +} + +fn mesh_slot(handle: Mesh.Handle) ?*MeshData { + const idx: usize = handle; + if (idx == 0 or idx >= meshes.buffer.len) return null; + if (meshes.buffer[idx]) |*mesh| return mesh; + return null; +} + +fn texture_slot(handle: Texture.Handle) ?*TextureData { + const idx: usize = handle; + if (idx == 0 or idx >= textures.buffer.len) return null; + if (textures.buffer[idx]) |*tex| return tex; + return null; +} + +fn unorm8_scale() f32 { + return 1.0 / 255.0; +} + +fn snorm16_scale() f32 { + return 1.0 / 32767.0; +} + +fn float_to_u8(v: f32) u8 { + return @intFromFloat(std.math.clamp(v, 0.0, 1.0) * 255.0); +} + +fn is_linear_fcram(ptr: [*]const u8, len: usize) bool { + const start = @intFromPtr(ptr); + return in_range(start, len, OS_FCRAM_VADDR, OS_FCRAM_SIZE) or + in_range(start, len, OS_OLD_FCRAM_VADDR, OS_OLD_FCRAM_SIZE); +} + +fn in_range(start: usize, len: usize, base: usize, size: usize) bool { + if (start < base) return false; + const offset = start - base; + return offset <= size and len <= size - offset; +} diff --git a/src/platform/3ds/3ds_thread.zig b/src/platform/3ds/3ds_thread.zig new file mode 100644 index 0000000..1231f84 --- /dev/null +++ b/src/platform/3ds/3ds_thread.zig @@ -0,0 +1,84 @@ +//! 3DS thread backend -- wraps libctru's `threadCreate`/`threadJoin`. + +const std = @import("std"); +const c = @import("../nintendo_c.zig").c; +const api = @import("../thread_api.zig"); + +pub const Handle = c.Thread; + +fn priority_to_3ds(p: api.Priority) c_int { + return switch (p) { + .highest => 0x18, + .high => 0x20, + .normal => 0x30, + .low => 0x38, + .lowest => 0x3f, + }; +} + +fn priority_from_3ds(v: c_int) api.Priority { + if (v <= 0x18) return .highest; + if (v <= 0x20) return .high; + if (v <= 0x30) return .normal; + if (v <= 0x38) return .low; + return .lowest; +} + +pub fn spawn(cfg: api.Config, comptime func: anytype, args: anytype) !Handle { + const alloc = cfg.allocator orelse return error.AllocatorRequired; + + const Args = @TypeOf(args); + const Instance = struct { + fn_args: Args, + allocator: std.mem.Allocator, + + fn entry(raw: ?*anyopaque) callconv(.c) void { + const self: *@This() = @ptrCast(@alignCast(raw.?)); + const a = self.allocator; + const Ret = @typeInfo(@TypeOf(func)).@"fn".return_type.?; + + switch (@typeInfo(Ret)) { + .void, .noreturn => @call(.auto, func, self.fn_args), + .error_union => @call(.auto, func, self.fn_args) catch |e| { + std.log.err("aether thread errored: {s}", .{@errorName(e)}); + }, + else => @compileError("thread fn must return void, !void, or noreturn"), + } + a.destroy(self); + } + }; + + const inst = try alloc.create(Instance); + errdefer alloc.destroy(inst); + inst.* = .{ .fn_args = args, .allocator = alloc }; + + const thread = c.threadCreate( + Instance.entry, + inst, + cfg.stack_size, + priority_to_3ds(cfg.priority), + -2, + false, + ) orelse return error.SystemResources; + + return thread; +} + +pub fn join(thread: Handle) void { + _ = c.threadJoin(thread, std.math.maxInt(u64)); + c.threadFree(thread); +} + +pub fn set_priority(thread: Handle, p: api.Priority) anyerror!void { + const handle = c.threadGetHandle(thread); + if (c.svcSetThreadPriority(handle, priority_to_3ds(p)) != 0) + return error.SystemResources; +} + +pub fn current_priority() api.Priority { + var p: c.s32 = priority_to_3ds(.normal); + const current = c.threadGetCurrent() orelse return .normal; + const handle = c.threadGetHandle(current); + if (c.svcGetThreadPriority(&p, handle) != 0) return .normal; + return priority_from_3ds(p); +} diff --git a/src/platform/3ds/input.zig b/src/platform/3ds/input.zig new file mode 100644 index 0000000..a93abe7 --- /dev/null +++ b/src/platform/3ds/input.zig @@ -0,0 +1,253 @@ +//! 3DS input backend. Polls libctru HID once per engine update and +//! translates button, circle-pad, C-stick, trigger, touch, and software +//! keyboard state into Aether's core input events. + +const std = @import("std"); +const core = @import("../../core/input/input.zig"); +const surface = @import("surface.zig"); + +const Result = c_int; + +const TouchPosition = extern struct { + px: u16, + py: u16, +}; + +const CirclePosition = extern struct { + dx: i16, + dy: i16, +}; + +extern fn hidInit() Result; +extern fn hidExit() void; +extern fn hidScanInput() void; +extern fn hidKeysHeld() u32; +extern fn hidTouchRead(pos: *TouchPosition) void; +extern fn hidCircleRead(pos: *CirclePosition) void; + +extern fn swkbdInit(swkbd: *anyopaque, typ: c_int, num_buttons: c_int, max_text_length: c_int) void; +extern fn swkbdSetFeatures(swkbd: *anyopaque, features: u32) void; +extern fn swkbdSetHintText(swkbd: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdSetButton(swkbd: *anyopaque, button: c_int, text: [*:0]const u8, submit: bool) void; +extern fn swkbdSetInitialText(swkbd: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdInputText(swkbd: *anyopaque, buf: [*]u8, bufsize: usize) c_int; + +const KEY_A: u32 = 1 << 0; +const KEY_B: u32 = 1 << 1; +const KEY_SELECT: u32 = 1 << 2; +const KEY_START: u32 = 1 << 3; +const KEY_DRIGHT: u32 = 1 << 4; +const KEY_DLEFT: u32 = 1 << 5; +const KEY_DUP: u32 = 1 << 6; +const KEY_DDOWN: u32 = 1 << 7; +const KEY_R: u32 = 1 << 8; +const KEY_L: u32 = 1 << 9; +const KEY_X: u32 = 1 << 10; +const KEY_Y: u32 = 1 << 11; +const KEY_ZL: u32 = 1 << 14; +const KEY_ZR: u32 = 1 << 15; +const KEY_TOUCH: u32 = 1 << 20; +const KEY_CSTICK_RIGHT: u32 = 1 << 24; +const KEY_CSTICK_LEFT: u32 = 1 << 25; +const KEY_CSTICK_UP: u32 = 1 << 26; +const KEY_CSTICK_DOWN: u32 = 1 << 27; + +const CIRCLE_PAD_MAX: f32 = 156.0; +const MAX_TEXT_BYTES: usize = 1024; +const SWKBD_STATE_BYTES: usize = 0x1000; +const SWKBD_TYPE_NORMAL: c_int = 0; +const SWKBD_BUTTON_LEFT: c_int = 0; +const SWKBD_BUTTON_RIGHT: c_int = 2; +const SWKBD_DARKEN_TOP_SCREEN: u32 = 1 << 1; +const SWKBD_MULTILINE: u32 = 1 << 3; +const SWKBD_DEFAULT_QWERTY: u32 = 1 << 9; + +const axis_count = @typeInfo(core.Axis).@"enum".fields.len; + +var initialized: bool = false; +var prev_keys: u32 = 0; +var prev_axes: [axis_count]f32 = @splat(0.0); +var prev_touch_down: bool = false; +var prev_touch_pos: core.Vec2 = .{}; + +pub fn setup(_: std.mem.Allocator, _: std.Io) void { + initialized = false; + prev_keys = 0; + prev_axes = @splat(0.0); + prev_touch_down = false; + prev_touch_pos = .{}; +} + +pub fn init() anyerror!void { + if (hidInit() != 0) return error.InputInitFailed; + initialized = true; +} + +pub fn deinit() void { + if (!initialized) return; + if (surface.is_system_closing()) { + initialized = false; + return; + } + hidExit(); + initialized = false; +} + +pub fn pump() void { + hidScanInput(); + const keys = hidKeysHeld(); + + diff_buttons(keys); + pump_axes(keys); + pump_touch(keys); + + prev_keys = keys; + core.signal_frame_boundary(); +} + +pub fn apply_cursor_mode(_: core.CursorMode) void {} + +pub fn begin_text_input_session(target: core.TextInputTarget, options: core.TextInputOptions) anyerror!void { + var state_buf: [SWKBD_STATE_BYTES]u8 align(8) = @splat(0); + const state: *anyopaque = @ptrCast(&state_buf); + + var initial_buf: [MAX_TEXT_BYTES:0]u8 = @splat(0); + const initial_len = copy_current_text(&initial_buf); + const initial = initial_buf[0..initial_len :0]; + + var hint_buf: [128:0]u8 = @splat(0); + const hint = copy_z(&hint_buf, target.id); + + const max_text_len = text_limit_c_int(options.max_bytes); + swkbdInit(state, SWKBD_TYPE_NORMAL, 2, max_text_len); + swkbdSetInitialText(state, initial.ptr); + swkbdSetHintText(state, hint.ptr); + swkbdSetButton(state, SWKBD_BUTTON_LEFT, "Cancel", false); + swkbdSetButton(state, SWKBD_BUTTON_RIGHT, "OK", true); + + var features = SWKBD_DARKEN_TOP_SCREEN | SWKBD_DEFAULT_QWERTY; + if (options.multiline) features |= SWKBD_MULTILINE; + swkbdSetFeatures(state, features); + + var out_buf: [MAX_TEXT_BYTES:0]u8 = @splat(0); + const out_size = output_buffer_size(options.max_bytes); + const button = swkbdInputText(state, out_buf[0..].ptr, out_size); + if (button == SWKBD_BUTTON_RIGHT) { + const len = bounded_z_len(out_buf[0..out_size]); + core.write_text_session_buffer(out_buf[0..len], .submitted); + } else { + core.write_text_session_buffer(initial_buf[0..initial_len], .cancelled); + } +} + +pub fn end_text_input_session() void {} + +fn diff_buttons(keys: u32) void { + const Pair = struct { mask: u32, button: core.Button }; + const map = [_]Pair{ + .{ .mask = KEY_A, .button = .A }, + .{ .mask = KEY_B, .button = .B }, + .{ .mask = KEY_X, .button = .X }, + .{ .mask = KEY_Y, .button = .Y }, + .{ .mask = KEY_L, .button = .LButton }, + .{ .mask = KEY_R, .button = .RButton }, + .{ .mask = KEY_SELECT, .button = .Back }, + .{ .mask = KEY_START, .button = .Start }, + .{ .mask = KEY_DUP, .button = .DpadUp }, + .{ .mask = KEY_DRIGHT, .button = .DpadRight }, + .{ .mask = KEY_DDOWN, .button = .DpadDown }, + .{ .mask = KEY_DLEFT, .button = .DpadLeft }, + }; + + inline for (map) |entry| { + const now = keys & entry.mask != 0; + const prev = prev_keys & entry.mask != 0; + if (now != prev) { + core.deliver_gamepad_button(entry.button, if (now) .pressed else .released); + } + } +} + +fn pump_axes(keys: u32) void { + var circle: CirclePosition = .{ .dx = 0, .dy = 0 }; + hidCircleRead(&circle); + + deliver_axis(.LeftX, normalize_signed(circle.dx, CIRCLE_PAD_MAX)); + deliver_axis(.LeftY, -normalize_signed(circle.dy, CIRCLE_PAD_MAX)); + deliver_axis(.RightX, digital_axis(keys, KEY_CSTICK_RIGHT, KEY_CSTICK_LEFT)); + deliver_axis(.RightY, digital_axis(keys, KEY_CSTICK_DOWN, KEY_CSTICK_UP)); + deliver_axis(.LeftTrigger, if (keys & KEY_ZL != 0) 1.0 else 0.0); + deliver_axis(.RightTrigger, if (keys & KEY_ZR != 0) 1.0 else 0.0); +} + +fn pump_touch(keys: u32) void { + const touch_down = keys & KEY_TOUCH != 0; + if (touch_down) { + var touch: TouchPosition = .{ .px = 0, .py = 0 }; + hidTouchRead(&touch); + const pos: core.Vec2 = .{ + .x = @floatFromInt(touch.px), + .y = @floatFromInt(touch.py), + }; + const delta: core.Vec2 = if (prev_touch_down) + .{ .x = pos.x - prev_touch_pos.x, .y = pos.y - prev_touch_pos.y } + else + .{}; + + core.deliver_mouse_move(pos, delta); + if (!prev_touch_down) core.deliver_mouse_button(.Left, .pressed, pos); + prev_touch_pos = pos; + } else if (prev_touch_down) { + core.deliver_mouse_button(.Left, .released, prev_touch_pos); + } + + prev_touch_down = touch_down; +} + +fn deliver_axis(axis: core.Axis, value: f32) void { + const idx = @intFromEnum(axis); + const prev = prev_axes[idx]; + if (value != 0.0 or prev != 0.0) core.deliver_gamepad_axis(axis, value); + prev_axes[idx] = value; +} + +fn normalize_signed(raw: anytype, max_value: f32) f32 { + const value = @as(f32, @floatFromInt(raw)) / max_value; + return std.math.clamp(value, -1.0, 1.0); +} + +fn digital_axis(keys: u32, positive_mask: u32, negative_mask: u32) f32 { + var value: f32 = 0.0; + if (keys & positive_mask != 0) value += 1.0; + if (keys & negative_mask != 0) value -= 1.0; + return value; +} + +fn output_buffer_size(limit: ?usize) usize { + const max = @min(limit orelse (MAX_TEXT_BYTES - 1), MAX_TEXT_BYTES - 1); + return max + 1; +} + +fn text_limit_c_int(limit: ?usize) c_int { + const max = @min(limit orelse (MAX_TEXT_BYTES - 1), MAX_TEXT_BYTES - 1); + return @intCast(@max(max, 1)); +} + +fn copy_current_text(dst: []u8) usize { + const s = core.current_text_session() orelse return 0; + const n = @min(dst.len - 1, s.buffer.items.len); + @memcpy(dst[0..n], s.buffer.items[0..n]); + dst[n] = 0; + return n; +} + +fn copy_z(dst: []u8, text: []const u8) [:0]const u8 { + const n = @min(dst.len - 1, text.len); + @memcpy(dst[0..n], text[0..n]); + dst[n] = 0; + return dst[0..n :0]; +} + +fn bounded_z_len(buf: []const u8) usize { + return std.mem.indexOfScalar(u8, buf, 0) orelse buf.len; +} diff --git a/src/platform/3ds/mango/fmt.zig b/src/platform/3ds/mango/fmt.zig new file mode 100644 index 0000000..5dfd04f --- /dev/null +++ b/src/platform/3ds/mango/fmt.zig @@ -0,0 +1,2 @@ +pub const zpsh = @import("fmt/zpsh.zig"); + diff --git a/src/platform/3ds/mango/fmt/zpsh.zig b/src/platform/3ds/mango/fmt/zpsh.zig new file mode 100644 index 0000000..0b41066 --- /dev/null +++ b/src/platform/3ds/mango/fmt/zpsh.zig @@ -0,0 +1,430 @@ +//! Zitrus PICA200 shader +//! +//! A simple shader format which omits the need for positional reads and has an overall simpler structure. +//! It omits numerous things that are not used or cannot be used by zitrus. +//! +//! Even if things are tightly packed, all sections are aligned to 32-bits. + +pub const magic = "ZPSH"; + +pub const Header = extern struct { + pub const Shader = packed struct(u32) { + entrypoints: u12, + instructions_minus_one: u12, + descriptors: u8, + + pub fn init(entrypoints: usize, instructions_size: usize, descriptors: usize) Shader { + return .{ + .entrypoints = @intCast(entrypoints), + .instructions_minus_one = @intCast(instructions_size - 1), + .descriptors = @intCast(descriptors), + }; + } + + pub fn instructions(size: Shader) usize { + return @as(usize, size.instructions_minus_one) + 1; + } + }; + + pub const Flags = packed struct(u8) { _: u8 = 0 }; + + magic: [magic.len]u8 = magic.*, + shader: Shader, + /// In `u32`s + entry_string_table_size: u16, + flags: Flags = .{}, + /// In `u32`s + header_size: u8 = @divExact(@sizeOf(Header), @sizeOf(u32)), + /// A xxHash32 hash of instructions and operand descriptors, in the described order. + /// Seed is 67 + code_hash: u32, + + pub const CheckError = error{ NotZpsh, InvalidZpsh }; + + pub fn check(hdr: Header) CheckError!void { + if (!std.mem.eql(u8, &hdr.magic, magic)) return error.NotZpsh; + } +}; + +pub const EntrypointHeader = extern struct { + pub const Flags = packed struct(u16) { + _: u16 = 0, + }; + + pub const ShaderInfo = packed struct(u16) { + pub const vertex: ShaderInfo = .{ .type = .vertex }; + pub const Type = enum(u2) { vertex, geometry_point, geometry_variable, geometry_fixed }; + + pub const Geometry = packed union(u14) { + pub const empty: Geometry = .{ .point = std.mem.zeroes(Geometry.Point) }; + + pub const Point = packed struct(u14) { + inputs_minus_one: u4, + _: u10 = 0, + }; + + pub const Variable = packed struct(u14) { + full_vertices: u5, + _: u9 = 0, + }; + + pub const Fixed = packed struct(u14) { + vertices_minus_one: u4, + uniform_start: FloatingRegister, + _: u3 = 0, + }; + + point: Point, + fixed: Fixed, + variable: Variable, + + pub fn initPoint(inputs: u5) Geometry { + return .{ .point = .{ .inputs_minus_one = @intCast(inputs - 1) } }; + } + + pub fn initVariable(full_vertices: u5) Geometry { + return .{ .variable = .{ .full_vertices = full_vertices } }; + } + + pub fn initFixed(vertices: u5, uniform_start: FloatingRegister) Geometry { + return .{ .fixed = .{ .vertices_minus_one = @intCast(vertices - 1), .uniform_start = uniform_start } }; + } + }; + + type: Type, + geometry: Geometry = .empty, + }; + + pub const BooleanConstantMask = packed struct(u16) { + // zig fmt: off + b0: bool, b1: bool, b2: bool, b3: bool, b4: bool, b5: bool, b6: bool, b7: bool, + b8: bool, b9: bool, b10: bool, b11: bool, b12: bool, b13: bool, b14: bool, b15: bool, + // zig fmt: on + + pub fn fromSet(set: std.EnumSet(BooleanRegister)) BooleanConstantMask { + var mask: BooleanConstantMask = std.mem.zeroes(BooleanConstantMask); + + for (std.enums.values(BooleanRegister)) |b| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(b), @intFromBool(set.contains(b)), .little); + } + + return mask; + } + + pub fn toSet(mask: BooleanConstantMask) std.EnumSet(BooleanRegister) { + var set: std.EnumSet(BooleanRegister) = .initEmpty(); + + for (std.enums.values(BooleanRegister)) |b| { + set.setPresent(b, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(b), .little) != 0); + } + + return set; + } + }; + + pub const IntegerConstantMask = packed struct(u16) { + // zig fmt: off + i0: bool, i1: bool, + i2: bool, i3: bool, + // zig fmt: on + _: u12, + + pub fn fromSet(set: std.EnumSet(IntegerRegister)) IntegerConstantMask { + var mask: IntegerConstantMask = std.mem.zeroes(IntegerConstantMask); + + for (std.enums.values(IntegerRegister)) |i| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(i), @intFromBool(set.contains(i)), .little); + } + + return mask; + } + + pub fn toSet(mask: IntegerConstantMask) std.EnumSet(IntegerRegister) { + var set: std.EnumSet(IntegerRegister) = .initEmpty(); + + for (std.enums.values(IntegerRegister)) |i| { + set.setPresent(i, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(i), .little) != 0); + } + + return set; + } + }; + + pub const FloatingConstantMask = extern struct { + // zig fmt: off + pub const Low = packed struct(u32) { + f0: bool, f1: bool, f2: bool, f3: bool, f4: bool, f5: bool, f6: bool, f7: bool, + f8: bool, f9: bool, f10: bool, f11: bool, f12: bool, f13: bool, f14: bool, f15: bool, + f16: bool, f17: bool, f18: bool, f19: bool, f20: bool, f21: bool, f22: bool, f23: bool, + f24: bool, f25: bool, f26: bool, f27: bool, f28: bool, f29: bool, f30: bool, f31: bool, + }; + + pub const Mid = packed struct(u32) { + f32: bool, f33: bool, f34: bool, f35: bool, f36: bool, f37: bool, f38: bool, f39: bool, + f40: bool, f41: bool, f42: bool, f43: bool, f44: bool, f45: bool, f46: bool, f47: bool, + f48: bool, f49: bool, f50: bool, f51: bool, f52: bool, f53: bool, f54: bool, f55: bool, + f56: bool, f57: bool, f58: bool, f59: bool, f60: bool, f61: bool, f62: bool, f63: bool, + }; + + pub const High = packed struct(u32) { + f64: bool, f65: bool, f66: bool, f67: bool, f68: bool, f69: bool, f70: bool, f71: bool, + f72: bool, f73: bool, f74: bool, f75: bool, f76: bool, f77: bool, f78: bool, f79: bool, + f80: bool, f81: bool, f82: bool, f83: bool, f84: bool, f85: bool, f86: bool, f87: bool, + f88: bool, f89: bool, f90: bool, f91: bool, f92: bool, f93: bool, f94: bool, f95: bool, + }; + // zig fmt: on + + low: Low, + mid: Mid, + high: High, + + pub fn fromSet(set: std.EnumSet(FloatingRegister)) FloatingConstantMask { + var mask: FloatingConstantMask = std.mem.zeroes(FloatingConstantMask); + + for (std.enums.values(FloatingRegister)) |f| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(f), @intFromBool(set.contains(f)), .little); + } + + return mask; + } + + pub fn toSet(mask: FloatingConstantMask) std.EnumSet(FloatingRegister) { + var set: std.EnumSet(FloatingRegister) = .initEmpty(); + + for (std.enums.values(FloatingRegister)) |f| { + set.setPresent(f, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(f), .little) != 0); + } + + return set; + } + }; + + pub const OutputMask = packed struct(u32) { + // zig fmt: off + o0: bool, o1: bool, o2: bool, o3: bool, o4: bool, o5: bool, o6: bool, o7: bool, + o8: bool, o9: bool, o10: bool, o11: bool, o12: bool, o13: bool, o14: bool, o15: bool, + _: u16 = 0, + // zig fmt: on + + pub fn fromSet(set: std.EnumSet(OutputRegister)) OutputMask { + var mask: OutputMask = std.mem.zeroes(OutputMask); + + for (std.enums.values(OutputRegister)) |o| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(o), @intFromBool(set.contains(o)), .little); + } + + return mask; + } + + pub fn toSet(mask: OutputMask) std.EnumSet(OutputRegister) { + var set: std.EnumSet(OutputRegister) = .initEmpty(); + + for (std.enums.values(OutputRegister)) |o| { + set.setPresent(o, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(o), .little) != 0); + } + + return set; + } + }; + + name_string_offset: u32, + instruction_offset: u16, + info: ShaderInfo, + flags: Flags, + header_size: u16 = @divExact(@sizeOf(EntrypointHeader), @sizeOf(u32)), + + // NOTE: Constants are sorted, that is, e.g: f0 = true, f1 = false, f2 = true then in memory there will be two floating constant entries that correspond to f0 and f2. Same for integers and same for outputs. + boolean_constant_mask: BooleanConstantMask, + integer_constant_mask: IntegerConstantMask, + floating_constant_mask: FloatingConstantMask, + output_mask: OutputMask, +}; + +pub const Parsed = struct { + code_hash: u32, + instructions: []const shader.encoding.Instruction, + operand_descriptors: []const shader.encoding.OperandDescriptor, + string_table: []const u8, + entrypoint_offsets: []const u8, + entrypoint_data: []const u8, + entrypoints: usize, + + pub fn initBuffer(buffer: []const u8) Header.CheckError!Parsed { + const header = try checkedSlice(buffer, 0, @sizeOf(Header)); + if (!std.mem.eql(u8, header[0..magic.len], magic)) return error.NotZpsh; + + const shader_word = try readLittle(u32, header, 4); + const entrypoints: usize = @intCast(shader_word & 0xfff); + const instructions_minus_one: u16 = @intCast((shader_word >> 12) & 0xfff); + const descriptors: u8 = @intCast(shader_word >> 24); + const instructions_count = @as(usize, instructions_minus_one) + 1; + const entry_string_table_size = try readLittle(u16, header, 8); + const header_size_words = try readLittle(u8, header, 11); + const code_hash = try readLittle(u32, header, 12); + + const header_size = try checkedMul(@as(usize, header_size_words), @sizeOf(u32)); + if (header_size < @sizeOf(Header)) return error.InvalidZpsh; + const entrypoint_offsets_start = header_size; + const entrypoint_offsets_size = try checkedMul(@as(usize, entrypoints), @sizeOf(u32)); + const code_start = try checkedAdd(entrypoint_offsets_start, entrypoint_offsets_size); + const code_size = try checkedMul(@sizeOf(shader.encoding.Instruction), instructions_count); + const operands_start = try checkedAdd(code_start, code_size); + const operands_size = try checkedMul(@sizeOf(shader.encoding.OperandDescriptor), @as(usize, descriptors)); + const string_table_start = try checkedAdd(operands_start, operands_size); + const string_table_size = try checkedMul(@as(usize, entry_string_table_size), @sizeOf(u32)); + const entrypoints_start = try checkedAdd(string_table_start, string_table_size); + + if (entrypoints_start > buffer.len) return error.InvalidZpsh; + + return .{ + .code_hash = code_hash, + .instructions = @alignCast(std.mem.bytesAsSlice(pica.shader.encoding.Instruction, try checkedSlice(buffer, code_start, code_size))), + .operand_descriptors = @alignCast(std.mem.bytesAsSlice(pica.shader.encoding.OperandDescriptor, try checkedSlice(buffer, operands_start, operands_size))), + .string_table = try checkedSlice(buffer, string_table_start, string_table_size), + .entrypoint_offsets = try checkedSlice(buffer, entrypoint_offsets_start, entrypoint_offsets_size), + .entrypoint_data = buffer[entrypoints_start..], + .entrypoints = entrypoints, + }; + } + + pub fn iterator(parsed: *const Parsed) EntrypointIterator { + return .{ + .parsed = parsed, + .offset_cursor = 0, + }; + } + + // TODO: This assumes a proper ZPSH (as we're the only ones who currently use them we're allowed to not care :p) + pub const EntrypointIterator = struct { + pub const Entry = struct { + info: EntrypointHeader.ShaderInfo, + offset: u16, + + name: [:0]const u8, + boolean_constant_set: std.enums.EnumSet(BooleanRegister), + integer_constant_set: std.enums.EnumSet(IntegerRegister), + floating_constant_set: std.enums.EnumSet(FloatingRegister), + output_set: std.enums.EnumSet(OutputRegister), + + integer_constants: []const [4]u8, + floating_constants: []const pica.F7_16x4, + output_map: []const pica.OutputMap, + }; + + parsed: *const Parsed, + offset_cursor: usize, + + pub fn next(it: *EntrypointIterator) Header.CheckError!?Entry { + if (it.offset_cursor >= it.parsed.entrypoint_offsets.len) return null; + + const offset = try readLittle(u32, it.parsed.entrypoint_offsets, it.offset_cursor); + it.offset_cursor +%= @sizeOf(u32); + + const entry_offset: usize = @intCast(offset); + if (entry_offset > it.parsed.entrypoint_data.len) return error.InvalidZpsh; + const entry_start = it.parsed.entrypoint_data[entry_offset..]; + _ = try checkedSlice(entry_start, 0, @sizeOf(EntrypointHeader)); + + const name_string_offset = try readLittle(u32, entry_start, 0); + const instruction_offset = try readLittle(u16, entry_start, 4); + const info: EntrypointHeader.ShaderInfo = @bitCast(try readLittle(u16, entry_start, 6)); + const boolean_constant_set = enumSetFromMask(BooleanRegister, u16, try readLittle(u16, entry_start, 12)); + const integer_constant_set = enumSetFromMask(IntegerRegister, u16, try readLittle(u16, entry_start, 14)); + const floating_constant_set = floatingSetFromMask( + try readLittle(u32, entry_start, 16), + try readLittle(u32, entry_start, 20), + try readLittle(u32, entry_start, 24), + ); + const output_map_set = enumSetFromMask(OutputRegister, u32, try readLittle(u32, entry_start, 28)); + + const integer_constants_byte_size = try checkedMul(integer_constant_set.count(), @sizeOf([4]u8)); + const floating_constants_byte_size = try checkedMul(floating_constant_set.count(), @sizeOf(pica.F7_16x4)); + const output_map_byte_size = try checkedMul(output_map_set.count(), @sizeOf(pica.OutputMap)); + const integer_constants_start = @sizeOf(EntrypointHeader); + const floating_constants_start = try checkedAdd(integer_constants_start, integer_constants_byte_size); + const output_map_start = try checkedAdd(floating_constants_start, floating_constants_byte_size); + + if (instruction_offset > std.math.maxInt(u12)) return error.InvalidZpsh; + if (name_string_offset > it.parsed.string_table.len) return error.InvalidZpsh; + const name_tail = it.parsed.string_table[name_string_offset..]; + const name_end = std.mem.indexOfScalar(u8, name_tail, 0) orelse return error.InvalidZpsh; + + return .{ + .info = info, + .offset = @intCast(instruction_offset), + + .name = name_tail[0..name_end :0], + .boolean_constant_set = boolean_constant_set, + .integer_constant_set = integer_constant_set, + .floating_constant_set = floating_constant_set, + .output_set = output_map_set, + + .integer_constants = @alignCast(std.mem.bytesAsSlice([4]u8, try checkedSlice(entry_start, integer_constants_start, integer_constants_byte_size))), + .floating_constants = @alignCast(std.mem.bytesAsSlice(pica.F7_16x4, try checkedSlice(entry_start, floating_constants_start, floating_constants_byte_size))), + .output_map = @alignCast(std.mem.bytesAsSlice(pica.OutputMap, try checkedSlice(entry_start, output_map_start, output_map_byte_size))), + }; + } + }; +}; + +fn checkedAdd(a: usize, b: usize) Header.CheckError!usize { + if (a > std.math.maxInt(usize) - b) return error.InvalidZpsh; + return a + b; +} + +fn checkedMul(a: usize, b: usize) Header.CheckError!usize { + if (b != 0 and a > std.math.maxInt(usize) / b) return error.InvalidZpsh; + return a * b; +} + +fn checkedSlice(buffer: []const u8, start: usize, len: usize) Header.CheckError![]const u8 { + const end = try checkedAdd(start, len); + if (end > buffer.len) return error.InvalidZpsh; + return buffer[start..end]; +} + +fn readLittle(comptime T: type, buffer: []const u8, offset: usize) Header.CheckError!T { + const bytes = try checkedSlice(buffer, offset, @sizeOf(T)); + return std.mem.readInt(T, bytes[0..@sizeOf(T)], .little); +} + +fn enumSetFromMask(comptime E: type, comptime T: type, mask: T) std.EnumSet(E) { + var set: std.EnumSet(E) = .initEmpty(); + for (std.enums.values(E)) |value| { + const bit: std.math.Log2Int(T) = @intCast(@intFromEnum(value)); + set.setPresent(value, ((mask >> bit) & 1) != 0); + } + return set; +} + +fn floatingSetFromMask(low: u32, mid: u32, high: u32) std.EnumSet(FloatingRegister) { + var set: std.EnumSet(FloatingRegister) = .initEmpty(); + for (std.enums.values(FloatingRegister)) |value| { + const index = @intFromEnum(value); + const word = switch (index / 32) { + 0 => low, + 1 => mid, + 2 => high, + else => unreachable, + }; + const bit: u5 = @intCast(index & 31); + set.setPresent(value, ((word >> bit) & 1) != 0); + } + return set; +} + +comptime { + std.debug.assert(std.mem.isAligned(@sizeOf(Header), @sizeOf(u32))); + std.debug.assert(std.mem.isAligned(@sizeOf(EntrypointHeader), @sizeOf(u32))); +} + +const builtin = @import("builtin"); +const std = @import("std"); +const zitrus = @import("zitrus"); +const pica = zitrus.hardware.pica; +const shader = pica.shader; + +const BooleanRegister = shader.register.Integral.Boolean; +const IntegerRegister = shader.register.Integral.Integer; +const FloatingRegister = shader.register.Source.Constant; +const OutputRegister = shader.register.Destination.Output; diff --git a/src/platform/3ds/mango/hardware.zig b/src/platform/3ds/mango/hardware.zig new file mode 100644 index 0000000..9613ef4 --- /dev/null +++ b/src/platform/3ds/mango/hardware.zig @@ -0,0 +1,201 @@ +//! Definitions for 3DS hardware + +pub const cpu = @import("hardware/cpu.zig"); +pub const pica = @import("hardware/pica.zig"); +pub const csnd = @import("hardware/csnd.zig"); +pub const pxi = @import("hardware/pxi.zig"); +pub const dsp = @import("hardware/dsp.zig"); +pub const hid = @import("hardware/hid.zig"); +pub const lgy = @import("hardware/lgy.zig"); +pub const i2c = @import("hardware/i2c.zig"); +pub const lcd = @import("hardware/lcd.zig"); + +/// Represents a register which is triggered by writing a value to it. +pub const Trigger = enum(u1) { trigger = 1 }; + +/// Represents an `AlignedPhysicalAddress` with no alignment. +pub const PhysicalAddress = AlignedPhysicalAddress(.@"1", .@"1"); + +/// Represents a `PhysicalAddress` which is aligned to `address_alignment` and stored with `address_shift` +pub fn AlignedPhysicalAddress(comptime address_alignment: std.mem.Alignment, comptime address_shift: std.mem.Alignment) type { + std.debug.assert(address_alignment.order(address_shift) != .lt); + + return enum(u32) { + zero = 0x00, + _, + + const AlignedPhysAddr = @This(); + pub const alignment = address_alignment; + pub const shift = address_shift; + + pub inline fn fromAddress(address: u32) AlignedPhysAddr { + return .fromPhysical(@as(PhysicalAddress, @enumFromInt(address))); + } + + pub fn fromPhysical(aligned_address: anytype) AlignedPhysAddr { + const OtherAlignedPhysAddr = @TypeOf(aligned_address); + + if (@typeInfo(OtherAlignedPhysAddr) != .@"enum" or !@hasDecl(OtherAlignedPhysAddr, "alignment") or !@hasDecl(OtherAlignedPhysAddr, "shift")) + @compileError("please provide a valid AlignedPhysicalAddress to .of()"); + + const other_alignment = @field(OtherAlignedPhysAddr, "alignment"); + const other_shift = @field(OtherAlignedPhysAddr, "shift"); + + if (@TypeOf(other_alignment) != std.mem.Alignment or @TypeOf(other_shift) != std.mem.Alignment or OtherAlignedPhysAddr != AlignedPhysicalAddress(other_alignment, other_shift)) + @compileError("please provide a valid AlignedPhysicalAddress to .of()"); + + const address = @intFromEnum(aligned_address) << @intCast(std.math.log2(other_shift.toByteUnits())); + + if (alignment.order(other_alignment) != .lt) { + std.debug.assert(alignment.check(address)); + } + + return @enumFromInt(address >> @intCast(std.math.log2(shift.toByteUnits()))); + } + }; +} + +/// Represents a a register which only spans the LSb of a word, leaving the others unused. +pub fn LsbRegister(comptime T: type) type { + std.debug.assert(@bitSizeOf(T) < @bitSizeOf(u32)); + + return packed struct(u32) { + const Lsb = @This(); + + value: T, + _: std.meta.Int(.unsigned, @bitSizeOf(u32) - @bitSizeOf(T)) = 0, + + pub inline fn init(value: T) Lsb { + return .{ .value = value }; + } + + pub fn format(lsb: Lsb, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else "{any}", .{lsb.value}); + } + }; +} + +/// Represents a a register which only spans the MSb of a word, leaving the others unused. +pub fn MsbRegister(comptime T: type) type { + std.debug.assert(@bitSizeOf(T) < @bitSizeOf(u32)); + + return packed struct(u32) { + const Msb = @This(); + + _: std.meta.Int(.unsigned, @bitSizeOf(u32) - @bitSizeOf(T)) = 0, + value: T, + + pub inline fn init(value: T) Msb { + return .{ .value = value }; + } + + pub fn format(msb: Msb, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else "{any}", .{msb.value}); + } + }; +} + +/// Represents a bitpacked array of `n` elements of `T`. +/// Stored in native endian. +/// +/// A `BitpackedArray` is stored from LSb (0) to MSb (n - 1). +pub fn BitpackedArray(comptime T: type, comptime n: usize) type { + const total_bit_size = @bitSizeOf(T) * n; + const ArrayInt = @Int(.unsigned, total_bit_size); + const ElementInt = @Int(.unsigned, @bitSizeOf(T)); + + return packed struct(ArrayInt) { + pub const Int = ArrayInt; + + raw: ArrayInt, + + pub inline fn init(value: [n]T) Self { + // NOTE: Cannot be `undefined`, any `undefined` bits make the entire value `undefined`. + var bt: Self = std.mem.zeroes(Self); + inline for (0..n) |i| bt.set(i, value[i]); + return bt; + } + + pub inline fn splat(value: T) Self { + // NOTE: Cannot be `undefined`, any `undefined` bits make the entire value `undefined`. + var bt: Self = std.mem.zeroes(Self); + inline for (0..n) |i| bt.set(i, value); + return bt; + } + + pub inline fn slice(bt: Self, index: usize, comptime len: usize) BitpackedArray(T, len) { + std.debug.assert(index + len <= n); + + const NewBitpacked = BitpackedArray(T, len); + const bt_int: Int = @bitCast(bt); + const new_bt_int: NewBitpacked.Int = @truncate(bt_int >> (index * @bitSizeOf(T))); + return @bitCast(new_bt_int); + } + + pub inline fn get(bt: Self, index: usize) T { + const value = std.mem.readPackedInt(ElementInt, @ptrCast(&bt.raw), index * @bitSizeOf(ElementInt), .native); + + return switch (@typeInfo(T)) { + .@"enum" => @enumFromInt(value), + else => @bitCast(value), + }; + } + + pub inline fn copyWith(bt: Self, comptime index: usize, value: T) Self { + var new_bt: Self = bt; + new_bt.set(index, value); + return new_bt; + } + + pub inline fn set(bt: *Self, index: usize, value: T) void { + std.mem.writePackedInt(ElementInt, @ptrCast(&bt.raw), index * @bitSizeOf(ElementInt), switch (@typeInfo(T)) { + .@"enum" => @intFromEnum(value), + else => @bitCast(value), + }, .native); + } + + pub fn format(bt: Self, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.writeAll("{ "); + for (0..n) |i| { + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else "{any}", .{bt.get(i)}); + if (i != n - 1) try w.writeAll(", "); + } + try w.writeAll(" }"); + } + + const Self = @This(); + }; +} + +comptime { + _ = pica; + _ = csnd; + _ = pxi; + _ = dsp; + _ = lgy; + _ = i2c; +} + +const testing = std.testing; + +test BitpackedArray { + const Thing = enum(u1) { foo, bar }; + const ThingArray = BitpackedArray(Thing, 4); + + var bt: ThingArray = .splat(.foo); + + bt.set(3, .bar); + + try testing.expect(bt.get(3) == .bar); + + bt.set(3, .foo); + + try testing.expect(bt.get(3) == .foo); + + bt.set(0, .bar); + + try testing.expect(bt.get(0) == .bar); +} + +const builtin = @import("builtin"); +const std = @import("std"); diff --git a/src/platform/3ds/mango/hardware/cpu.zig b/src/platform/3ds/mango/hardware/cpu.zig new file mode 100644 index 0000000..32ea21a --- /dev/null +++ b/src/platform/3ds/mango/hardware/cpu.zig @@ -0,0 +1,194 @@ +//! Definitions for ARM instructions and MMIO registers +//! which are common to both CPUs. +//! +//! See `arm9` and `arm11` for cpu-specific things. +//! +//! Based on the technical reference manuals of both. + +// TODO: Not tested + +pub const arm9 = @import("cpu/arm9.zig"); +pub const arm11 = @import("cpu/arm11.zig"); + +pub const Register = enum(u4) { + pub const sp: Register = .r13; + pub const lr: Register = .r14; + pub const pc: Register = .r15; + + r0, + r1, + r2, + r3, + r4, + r5, + r6, + r7, + r8, + r9, + r10, + r11, + r12, + r13, + r14, + r15, +}; + +// CP15 c0 c0 0 -> ID +// CP15 c0 c0 1 -> Cache Type + +pub inline fn waitForInterrupt() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c0, 4" + : + : [sbz] "r" (0), + ); +} + +pub const cache = struct { + pub const SetWay = packed struct(u32) { + _reserved0: u4 = 0, + /// Depends on cache size + set: u13, + _reserved1: u13, + way: u2, + + pub inline fn invalidateInstruction(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c5, 2" + : + : [sw] "r" (set_way), + ); + } + + pub inline fn invalidateData(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c6, 2" + : + : [sw] "r" (set_way), + ); + } + + pub inline fn cleanData(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c10, 2" + : + : [sw] "r" (set_way), + ); + } + + pub inline fn flushData(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c14, 2" + : + : [sw] "r" (set_way), + ); + } + }; + + pub const Address = packed struct(u32) { + virtual: u32, + + pub inline fn invalidateInstruction(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c5, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn invalidateData(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c6, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn cleanData(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c10, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn flushData(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c14, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn flushBranchPredictor(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c5, 7" + : + : [addr] "r" (address), + ); + } + }; + + pub inline fn flushPrefetchBuffer() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c5, 4" + : + : [sbz] "r" (0), + ); + } + + pub inline fn dataSynchronizationBarrier() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c10, 4" + : + : [sbz] "r" (0), + ); + } + + pub inline fn dataMemoryBarrier() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c10, 5" + : + : [sbz] "r" (0), + ); + } + + pub inline fn flushBranchPredictor() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c5, 6" + : + : [sbz] "r" (0), + ); + } + + /// Also flushes branch predictor cache + pub inline fn invalidateInstruction() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c5, 0" + : + : [sbz] "r" (0), + ); + } + + pub inline fn invalidateData() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c6, 0" + : + : [sbz] "r" (0), + ); + } + + /// Also flushes branch predictor cache + pub inline fn invalidate() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c7, 0" + : + : [sbz] "r" (0), + ); + } + + pub inline fn cleanData() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c10, 0" + : + : [sbz] "r" (0), + ); + } + + pub inline fn flushData() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c14, 0" + : + : [sbz] "r" (0), + ); + } +}; + +comptime { + _ = arm11; + _ = arm9; +} + +const std = @import("std"); +const zitrus = @import("zitrus"); diff --git a/src/platform/3ds/mango/hardware/cpu/arm11.zig b/src/platform/3ds/mango/hardware/cpu/arm11.zig new file mode 100644 index 0000000..4393460 --- /dev/null +++ b/src/platform/3ds/mango/hardware/cpu/arm11.zig @@ -0,0 +1,324 @@ +//! Based on the ARM11 MPCore r2p0 Technical Reference Manual + +// CP15 c0 c0 3 -> TLB Type +// CP15 c0 c0 5 -> CPUID +// CP15 c0 c1 -> Feature Registers +// CP15 c0 c2 -> ISA Attributes Registers + +pub const Control = packed struct(u32) { + pub const Auxiliary = packed struct(u32) { + pub const Multiprocessing = enum(u1) { amp, smp }; + + return_stack: bool = false, + dynamic_branch_prediction: bool = false, + static_branch_prediction: bool = false, + instruction_folding: bool = false, + exclusive_caches: bool = false, + multiprocessing: Multiprocessing, + l1_parity_errors: bool = false, + _reserved0: u25 = 0, + + pub inline fn read() Auxiliary { + return asm volatile ("mrc p15, 0, %[cnt], c1, c0, 1" + : [cnt] "=r" (-> Auxiliary), + ); + } + + pub inline fn write(cnt: Auxiliary) void { + return asm volatile ("mcr p15, 0, %[cnt], c1, c0, 1" + : + : [cnt] "r" (cnt), + ); + } + }; + + mmu: bool = false, + /// Data abort on unaligned loads/stores + strict_alignment: bool = false, + l1_data_cache: bool = false, + _reserved0: u4 = std.math.maxInt(u4), + _reserved1: u1 = 0, + /// Deprecated + system_protection: bool = false, + /// Deprecated + rom_protection: bool = false, + _reserved2: u1 = 0, + branch_prediction: bool = false, + l1_instruction_cache: bool = false, + alternate_exception_vectors: bool = false, + _reserved3: u1 = 1, + disable_thumb_by_pc_loads: bool = false, + _unused0: u6 = 0, + unaligned_access: bool = false, + subpage_access_permissions: bool = false, + _reserved4: u1 = 0, + set_cpsr_e_on_exception: bool = false, + _reserved5: u1 = 0, + non_maskable_fast_irq: bool = false, + tex_remap: bool = false, + force_access_permissions: bool = false, + _reserved6: u2 = 0, + + pub inline fn read() Control { + return asm volatile ("mrc p15, 0, %[cnt], c1, c0, 0" + : [cnt] "=r" (-> Control), + ); + } + + pub inline fn write(cnt: Control) void { + return asm volatile ("mcr p15, 0, %[cnt], c1, c0, 0" + : + : [cnt] "r" (cnt), + ); + } +}; + +pub const CoprocessorAccess = packed struct(u32) { + pub const Mode = enum(u2) { denied, supervisor, full = 3 }; + + _reserved0: u20 = 0, + @"10": Mode = .denied, + @"11": Mode = .denied, + _reserved1: u8 = 0, + + pub inline fn read() CoprocessorAccess { + return asm volatile ("mrc p15, 0, %[acc], c1, c0, 2" + : [acc] "=r" (-> CoprocessorAccess), + ); + } + + pub inline fn write(acc: CoprocessorAccess) void { + return asm volatile ("mcr p15, 0, %[acc], c1, c0, 2" + : + : [acc] "r" (acc), + ); + } +}; + +pub const TranslationTable = extern struct { + pub const Cachable = enum(u2) { + none, + write_back_allocate, + write_through, + write_back, + }; + + pub const Base = packed struct(u32) { + _reserved0: u1 = 0, + shared: bool = false, + _reserved1: u1 = 0, + region: Cachable = .none, + /// TTBL 0 base depends on `Control.separate_table_boundary` and TTBL 1 is restricted to 16KB pages + base: u27, + + pub inline fn read(comptime table: u1) Base { + return asm volatile ("mrc p15, 0, %[base], c2, c0, %[reg]" + : [base] "=r" (-> Base), + : [reg] "i" (table), + ); + } + + pub inline fn write(base: Base, comptime table: u1) void { + return asm volatile ("mcr p15, 0, %[base], c2, c0, %[reg]" + : + : [base] "r" (base), + [reg] "i" (table), + ); + } + }; + + pub const Control = packed struct(u32) { + pub const Boundary = enum(u3) { + @"16KB", + @"8KB", + @"4KB", + @"2KB", + @"1KB", + @"512B", + @"256B", + @"128B", + }; + + separate_table_boundary: Boundary, + _reserved0: u29 = 0, + + pub inline fn read() TranslationTable.Control { + return asm volatile ("mrc p15, 0, %[cnt], c2, c0, 2" + : [cnt] "=r" (-> TranslationTable.Control), + ); + } + + pub inline fn write(cnt: TranslationTable.Control) void { + return asm volatile ("mcr p15, 0, %[cnt], c2, c0, 2" + : + : [cnt] "r" (cnt), + ); + } + }; +}; + +pub const DomainAccess = packed struct(u32) { + pub const Mode = enum(u2) { none, client, manager = 3 }; + + access: BitpackedArray(Mode, 16), + + pub inline fn read() DomainAccess { + return asm volatile ("mrc p15, 0, %[acc], c3, c0, 0" + : [acc] "=r" (-> DomainAccess), + ); + } + + pub inline fn write(acc: DomainAccess) void { + return asm volatile ("mcr p15, 0, %[acc], c3, c0, 0" + : + : [acc] "r" (acc), + ); + } +}; + +pub const Fault = packed struct(u32) { + pub const Kind = enum(u1) { data, instruction }; + pub const Operation = enum(u1) { read, write }; + pub const Status = enum(u5) { + alignment = 0b00001, + instruction_cache_maintenance = 0b00100, + first_level_external_abort = 0b01100, + second_level_external_abort = 0b01110, + section_translation = 0b00101, + page_translation = 0b00111, + section_access = 0b00011, + page_access = 0b00110, + section_domain = 0b01001, + page_domain = 0b01011, + section_permission = 0b01101, + page_permission = 0b01111, + precise_external_abort = 0b01000, + imprecise_external_abort = 0b10110, + debug = 0b00010, + _, + }; + + status_lo: u4, + domain: u4, + _reserved0: u2 = 0, + status_hi: u1, + operation: Operation, + external_abort: bool, + _reserved1: u19, + + pub fn status(fault: Fault) Status { + return @enumFromInt(fault.status_lo | (@as(u5, fault.status_hi) << 4)); + } + + pub inline fn read(comptime kind: Kind) Fault { + return asm volatile ("mrc p15, 0, %[st], c5, c0, %[kind]" + : [st] "=r" (-> DomainAccess), + : [kind] "i" (@intFromEnum(kind)), + ); + } + + pub inline fn write(st: Fault, comptime kind: Kind) void { + return asm volatile ("mcr p15, 0, %[st], c5, c0, %[kind]" + : + : [st] "r" (st), + [kind] "i" (@intFromEnum(kind)), + ); + } + + pub const Address = packed struct(u32) { + pub const Kind = enum(u1) { default, watchpoint }; + + virtual: u32, + + pub inline fn read(comptime kind: Address.Kind) u32 { + return asm volatile ("mrc p15, 0, %[addr], c6, c0, %[kind]" + : [addr] "=r" (-> Address), + : [kind] "i" (@intFromEnum(kind)), + ); + } + + pub inline fn write(addr: Address, comptime kind: Address.Kind) void { + return asm volatile ("mcr p15, 0, %[addr], c6, c0, %[kind]" + : + : [addr] "r" (addr), + [kind] "i" (@intFromEnum(kind)), + ); + } + }; +}; + +pub fn Monitor(comptime T: type) type { + return extern struct { + raw: T, + + pub fn init(value: T) MonitorSelf { + return .{ .raw = value }; + } + + /// Performs a load, putting the monitor into a exclusive access state. + pub fn load(mon: *const MonitorSelf) T { + return switch (@bitSizeOf(T)) { + 8 => @bitCast(asm volatile ("ldrexb %[to], %[ptr]" + : [to] "=r" (-> u8), + : [ptr] "p" (&mon.raw), + )), + 16 => @bitCast(asm volatile ("ldrexh %[to], %[ptr]" + : [to] "=r" (-> u16), + : [ptr] "p" (&mon.raw), + )), + 32 => @bitCast(asm volatile ("ldrex %[to], %[ptr]" + : [to] "=r" (-> u32), + : [ptr] "p" (&mon.raw), + )), + 64 => asm volatile ("ldrexd %[to:Q], %[to:R], %[ptr]" + : [to] "=r" (-> u64), + : [ptr] "p" (&mon.raw), + ), + else => @compileError("Unsupported Monitor(" ++ @typeName(T) ++ ")"), + }; + } + + /// Tries to perform a store. If the monitor is still in exclusive access after a + /// `load`, the store succeeds and returns `false` putting the monitor into an + /// open state again. + /// + /// Spurious changes to an open state may happen. + pub fn store(mon: *MonitorSelf, value: T) bool { + return switch (@bitSizeOf(T)) { + 8 => asm volatile ("strexb %[fail], %[value], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + 16 => asm volatile ("strexh %[fail], %[value], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + 32 => asm volatile ("strex %[fail], %[value], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + 64 => asm volatile ("strexd %[fail], %[value:Q], %[value:R], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + else => @compileError("Unsupported Monitor(" ++ @typeName(T) ++ ")"), + }; + } + + /// Puts the monitor into an open state. + pub fn clear(_: *MonitorSelf) void { + asm volatile ("clrex"); + } + + const MonitorSelf = @This(); + }; +} + +const std = @import("std"); +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/cpu/arm9.zig b/src/platform/3ds/mango/hardware/cpu/arm9.zig new file mode 100644 index 0000000..7d93d74 --- /dev/null +++ b/src/platform/3ds/mango/hardware/cpu/arm9.zig @@ -0,0 +1,83 @@ +pub const Control = packed struct(u32) { + mmu: bool = false, + _reserved0: u1 = 0, + data_cache: bool = false, + _reserved1: u4 = std.math.maxInt(u4), + big_endian: bool = false, + _reserved2: u4 = 0, + instruction_cache: bool = false, + alternate_exception_vectors: bool = false, + cache_round_robin_replacement: bool = false, + disable_thumb_by_pc_loads: bool = false, + data_tcm: bool = false, + data_tcm_load_mode: bool = false, + instruction_tcm: bool = false, + instruction_tcm_load_mode: bool = false, + _reserved3: u12 = 0, + + pub inline fn read() Control { + return asm volatile ("mrc p15, 0, %[cnt], c1, c0, 0" + : [cnt] "=r" (-> Control), + ); + } + + pub inline fn write(cnt: Control) void { + return asm volatile ("mcr p15, 0, %[cnt], c1, c0, 0" + : + : [cnt] "r" (cnt), + ); + } +}; + +pub const Interrupt = packed struct(u32) { + pub const Registers = extern struct { + enable: Interrupt, + flags: Interrupt, + }; + + pub const Pxi = packed struct(u3) { + sync: bool, + send_emoty: bool, + receive_full: bool, + }; + + pub const Sdio = packed struct(u2) { + controller: bool, + async: bool, + }; + + pub const Debug = packed struct(u2) { + receive: bool, + send: bool, + }; + + pub const Gamecard = packed struct(u2) { + power_off: bool, + insert: bool, + }; + + pub const Xdma = packed struct(u2) { + event: bool, + fault: bool, + }; + + ndma: BitpackedArray(bool, 8), + timer: BitpackedArray(bool, 4), + pxi: Pxi, + aes: bool, + sdio: BitpackedArray(Sdio, 2), + debug: Debug, + rsa: bool, + ctr_card: BitpackedArray(bool, 2), + gamecard: Gamecard, + ntr_card: bool, + xdma: Xdma, + _unused0: u2 = 0, +}; + +// CP15 c0 c0 2 -> TCM size + +const std = @import("std"); +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/csnd.zig b/src/platform/3ds/mango/hardware/csnd.zig new file mode 100644 index 0000000..fa59fe9 --- /dev/null +++ b/src/platform/3ds/mango/hardware/csnd.zig @@ -0,0 +1,116 @@ +//! Definitions for MMIO `CSND` registers. +//! +//! Based on the documentation found in GBATEK and 3dbrew: +//! - https://problemkaputt.de/gbatek.htm#3dssoundandmicrophone +//! - https://www.3dbrew.org/wiki/CSND_Registers + +pub const Volume = enum(u16) { + pub const min: Volume = .volume(0); + pub const max: Volume = .volume(0); + + _, + + pub fn volume(value: u16) Volume { + return @enumFromInt(@min(value, 0x8000)); + } +}; + +pub const SampleRate = enum(u16) { + pub const min: SampleRate = .rate(0); + pub const max: SampleRate = .rate(0xFFBE); + + _, + + pub fn rate(value: u16) SampleRate { + std.debug.assert(value <= 0xFFBE); + return @enumFromInt(value); + } +}; + +pub const MasterControl = packed struct(u32) { + volume: Volume, + mute: bool, + _unused0: u13, + dissonant_disable: bool, + /// When this is not true, some registers won't be written. + read_write: bool, +}; + +pub const Channel = extern struct { + pub const WaveDuty = enum(u3) { _ }; + pub const Format = enum(u2) { pcm8, pcm16, ima_adpcm, psg }; + pub const Repeat = enum(u2) { manual, loop, one_shot, loop_constant }; + + pub const Control = packed struct(u32) { + wave_duty: WaveDuty, + _unused0: u2 = 0, + interpolate_linearly: bool, + hold_last: bool, + _unused1: u2 = 0, + repeat: Repeat, + format: Format, + pause_disable: bool, + busy: bool, + sample_rate: SampleRate, + }; + + pub const ImaAdPcm = packed struct(u32) { + value: i16, + index_value: u7, + _unused0: u8, + reload_second_buffer_state: bool, + }; + + pub const Volume = packed struct(u32) { + right: csnd.Volume, + left: csnd.Volume, + }; + + control: Control, + output_volume: Channel.Volume, + capture_volume: Channel.Volume, + start_address: PhysicalAddress, + total_size: hardware.LsbRegister(u27), + // So you can start with some sound and then loop with another? If true cool. + // XXX: 3dbrew says this is the other channel? When this is 0x0 then mono audio is played. Name is not accurate + loop_restart_address: PhysicalAddress, + start_ima_state: ImaAdPcm, + restart_ima_state: ImaAdPcm, +}; + +pub const Capture = extern struct { + pub const Format = enum(u1) { pcm16, pcm8 }; + + pub const Control = packed struct(u32) { + one_shot: bool, + format: Format, + _unknown0: u1, + _unused0: u12 = 0, + busy: bool, + _unused1: u16 = 0, + }; + + control: Control, + sample_rate: LsbRegister(SampleRate), + length: LsbRegister(u24), + address: PhysicalAddress, +}; + +pub const Registers = extern struct { + master: MasterControl, + _unused0: [0x3FC]u8, + channels: [32]Channel, + captures: [2]Capture, +}; + +const csnd = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const LsbRegister = hardware.LsbRegister; +const MsbRegister = hardware.MsbRegister; + +const PhysicalAddress = hardware.PhysicalAddress; diff --git a/src/platform/3ds/mango/hardware/dsp.zig b/src/platform/3ds/mango/hardware/dsp.zig new file mode 100644 index 0000000..15c9578 --- /dev/null +++ b/src/platform/3ds/mango/hardware/dsp.zig @@ -0,0 +1,90 @@ +//! Definitions for MMIO `DSP` registers. +//! +//! Used for **D**igital **S**ignal **P**rocessing, a.k.a: sound. +//! Its an independent processor named *TeakLite II* from XpertTeak. +//! +//! +//! Based on the documentation found in GBATEK and 3dbrew: +//! - https://problemkaputt.de/gbatek.htm#3dssoundandmicrophone + +pub const Address = enum(u16) { + _, + + pub fn init(address: u16) Address { + return @enumFromInt(address); + } +}; + +pub const Configuration = packed struct(u16) { + pub const Region = enum(u2) { data, mmio, code, ahbm }; + pub const Length = enum(u2) { @"1", @"8", @"16", free }; + + reset: bool, + auto_increment_transfer_address: bool, + read_length: Length, + read_start: bool, + irq_read_fifo_full: bool, + irq_read_fifo_not_empty: bool, + irq_write_fifo_full: bool, + irq_write_fifo_empty: bool, + irq_reply_register: BitpackedArray(bool, 3), + transfer_region: Region, +}; + +pub const Status = packed struct(u16) { + reading: bool, + writing: bool, + resetting: bool, + _unused0: u2, + read_fifo_full: bool, + read_fifo_not_empty: bool, + write_fifo_full: bool, + write_fifo_empty: bool, + semaphore_irq: bool, + reply_register_unwritten: BitpackedArray(bool, 3), + command_register_unread: BitpackedArray(bool, 3), +}; + +pub const Semaphore = extern struct { + send: BitpackedArray(bool, 16), + _unused0: [2]u8, + irq_disable: BitpackedArray(bool, 16), + _unused1: [2]u8, + send_clear: BitpackedArray(bool, 16), + _unused2: [2]u8, + receive: BitpackedArray(bool, 16), + _unused3: [2]u8, +}; + +pub const Registers = extern struct { + fifo: u16, + _unused0: [2]u8, + transfer_address: Address, + _unused1: [2]u8, + config: Configuration, + _unused2: [2]u8, + status: Status, + _unused3: [2]u8, + semaphore: Semaphore, + command0: u16, + _unused4: [2]u8, + reply0: u16, + _unused5: [2]u8, + command1: u16, + _unused6: [2]u8, + reply1: u16, + _unused7: [2]u8, + command2: u16, + _unused8: [2]u8, + reply2: u16, + _unused9: [2]u8, +}; + +const dsp = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/hid.zig b/src/platform/3ds/mango/hardware/hid.zig new file mode 100644 index 0000000..45adc62 --- /dev/null +++ b/src/platform/3ds/mango/hardware/hid.zig @@ -0,0 +1,54 @@ +//! Definitions for MMIO `HID` registers. +//! +//! Used only for main PAD buttons (Cicle Pad + New3DS buttons not included) +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsmiscregisters + +pub const State = packed struct(u16) { + a: bool, + b: bool, + select: bool, + start: bool, + right: bool, + left: bool, + up: bool, + down: bool, + r: bool, + l: bool, + x: bool, + y: bool, + _unused0: u3 = 0, +}; + +pub const Interrupt = packed struct(u16) { + pub const Condition = enum(u1) { @"or", @"and" }; + + pub const Source = packed struct(u12) { + a: bool, + b: bool, + select: bool, + start: bool, + right: bool, + left: bool, + up: bool, + down: bool, + r: bool, + l: bool, + x: bool, + y: bool, + }; + + source: Source, + _unused0: u2 = 0, + enable: bool, + condition: Condition, +}; + +pub const Registers = extern struct { + released: State, + irq: Interrupt, +}; + +const hid = @This(); + +const zitrus = @import("zitrus"); diff --git a/src/platform/3ds/mango/hardware/i2c.zig b/src/platform/3ds/mango/hardware/i2c.zig new file mode 100644 index 0000000..0a4e128 --- /dev/null +++ b/src/platform/3ds/mango/hardware/i2c.zig @@ -0,0 +1,45 @@ +//! Definitions for MMIO `HID` registers. +//! +//! Used for numerous things. +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsi2cregisters + +pub const Direction = enum(u1) { write, read }; + +pub const Control = packed struct(u8) { + stop: bool, + start: bool, + pause: bool, + _unknown0: u1 = 0, + ack: bool, + direction: Direction, + irq_enable: bool, + busy: bool, +}; + +pub const ControlExtended = packed struct(u16) { + clock: bool, + wait_if_clock_low: bool, + _unused0: u13 = 0, + _unknown1: u1 = 0, +}; + +pub const Speed = enum(u6) { + _, +}; + +pub const Bus = extern struct { + pub const Clock = packed struct(u16) { + low: Speed, + _unused0: u2 = 0, + high: Speed, + _unused1: u2 = 0, + }; + + data: u8, + control: Control, + control_extended: ControlExtended, + clock: Clock, +}; + +const std = @import("std"); diff --git a/src/platform/3ds/mango/hardware/lcd.zig b/src/platform/3ds/mango/hardware/lcd.zig new file mode 100644 index 0000000..2629c48 --- /dev/null +++ b/src/platform/3ds/mango/hardware/lcd.zig @@ -0,0 +1,92 @@ +//! Definitions for MMIO `LCD` registers. +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsvideolcdregisters + +pub const Parallax = extern struct { + pub const Control = packed struct(u32) { + pub const Enable = enum(u2) { off, enable, _ }; + + tp27_enable: Enable, + tp27_invert_output: bool, + _unused0: u13 = 0, + tp29_enable: Enable, + tp29_invert_output: bool, + _unused1: u13 = 0, + }; + + pub const Duty = packed struct(u32) { + /// (off + 1) * 0.9us + off: u16, + /// (on + 1) * 0.9us + on: u16, + }; + + control: Control, + /// Controls the TP27 parallax PWM + duty: Duty, +}; + +pub const Screen = extern struct { + pub const Flags = packed struct(u32) { + abl_enable: bool, + _unused0: u7 = 0, + dither_related: BitpackedArray(bool, 2), + _unused1: u22 = 0, + }; + + pub const Fill = packed struct(u32) { + r: u8, + g: u8, + b: u8, + enable: bool, + _unused0: u7 = 0, + }; + + pub const AdaptiveBacklight = extern struct { + // TODO: lazy + _todo: [0x5F8]u8, + }; + + flags: Flags, + fill: Fill, + adaptive_backlight: AdaptiveBacklight, + + comptime { + std.debug.assert(@sizeOf(Screen) == 0x600); + } +}; + +pub const Clock = packed struct(u32) { + top_disable: bool = false, + _unused0: u15 = 0, + bottom_disable: bool = false, + _unused1: u15 = 0, +}; + +pub const Status = packed struct(u32) { + _: u32 = 0, +}; + +pub const Reset = enum(u1) { reset, enable }; + +pub const Registers = extern struct { + parallax: Parallax, + status: Status, + clock: Clock, + _unknown0: u32, + reset: LsbRegister(Reset), + _unused0: [122]u32, + top: Screen, + _unused1: [128]u32, + bottom: Screen, +}; + +const lcd = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const LsbRegister = hardware.LsbRegister; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/lgy.zig b/src/platform/3ds/mango/hardware/lgy.zig new file mode 100644 index 0000000..f3def32 --- /dev/null +++ b/src/platform/3ds/mango/hardware/lgy.zig @@ -0,0 +1,138 @@ +//! Definitions for MMIO `LGY` registers. +//! +//! Used for **L**e**g**ac**y** framebuffer conversion, NDS/GBA -> 3DS. +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsvideolgyregisterslegacygbandsvideotoframebuffer + +pub const Framebuffer = extern struct { + pub const Format = enum(u2) { abgr8888, bgr888, rgb5551, rgb565 }; + pub const Rotate = enum(u2) { none, @"90", @"180", @"270" }; + + pub const Control = packed struct(u32) { + start: bool, + enable_vertical_scaling: bool, + enable_horizontal_scaling: bool, + _unused0: u1 = 0, + brightness_dither_enable: bool, + _brigness_dither_enable_too: bool, + _unused1: u2 = 0, + format: Format, + /// Clockwise rotation + rotate: Rotate, + swizzle: bool, + _unused2: u2 = 0, + dma: bool, + _unused3: u16 = 0, + }; + + pub const Dimensions = packed struct(u32) { + width_minus_one: u9, + _unused0: u7, + height_minus_one: u9, + _unused1: u7, + + pub fn init(width: u9, height: u9) Dimensions { + return .{ .width_minus_one = width - 1, .height_minus_one = height - 1 }; + } + }; + + pub const InterruptStatus = packed struct(u32) { + first_block: bool, + next_block: bool, + last_line: bool, + _unused0: u13 = 0, + current_block_line: u8, + _unused1: u8 = 0, + }; + + pub const InterruptEnable = packed struct(u32) { + first_block: bool, + next_block: bool, + last_line: bool, + _unused0: u29 = 0, + }; + + pub const Scaling = extern struct { + /// Scale according `length` output pixels. + /// + /// `bits` tell which input pixels get used, effectively making it `length` / `bits` + pub const Pattern = extern struct { + pub const @"1x": Pattern = .init(1, .splat(1)); + pub const @"1.16x": Pattern = .init(7, .init(.{ 1, 1, 0, 1, 1, 0, 1, 0 })); + pub const @"1.2x": Pattern = .init(6, .init(.{ 1, 1, 1, 0, 1, 1, 0, 0 })); + pub const @"1.25x": Pattern = .init(5, .init(.{ 1, 1, 0, 1, 1, 0, 0, 0 })); + pub const @"1.33x": Pattern = .init(4, .init(.{ 1, 1, 1, 0, 0, 0, 0, 0 })); + pub const @"1.4x": Pattern = .init(7, .init(.{ 1, 1, 1, 1, 1, 0, 0, 0 })); + pub const @"1.5x": Pattern = .init(3, .init(.{ 1, 1, 0, 0, 0, 0, 0, 0 })); + pub const @"1.66x": Pattern = .init(5, .init(.{ 1, 1, 1, 0, 1, 0, 0, 0 })); + pub const @"1.75x": Pattern = .init(7, .init(.{ 1, 0, 1, 0, 1, 0, 1, 0 })); + pub const @"2x": Pattern = .init(2, .init(.{ 1, 0, 0, 0, 0, 0, 0, 0 })); + pub const @"2.33x": Pattern = .init(7, .init(.{ 1, 0, 0, 1, 0, 0, 1, 0 })); + pub const @"2.5x": Pattern = .init(5, .init(.{ 1, 0, 1, 0, 0, 0, 0, 0 })); + pub const @"2.66x": Pattern = .init(8, .init(.{ 1, 0, 0, 1, 0, 0, 0, 1 })); + pub const @"3x": Pattern = .init(3, .init(.{ 1, 0, 0, 0, 0, 0, 0, 0 })); + pub const @"3.5x": Pattern = .init(7, .init(.{ 1, 0, 0, 1, 0, 0, 0, 0 })); + + length: LsbRegister(u3), + bits: LsbRegister(BitpackedArray(u1, 8)), + + pub fn init(length: u3, bits: BitpackedArray(u1, 8)) Pattern { + return .{ .length = length, .bits = bits }; + } + }; + + pub const Brightness = enum(u16) { _ }; + + pattern: Pattern, + _unused0: [0x38]u8, + brightness: [6][8]LsbRegister(Brightness), + + comptime { + std.debug.assert(@sizeOf(Scaling) == 0x100); + } + }; + + control: Control, + size: Dimensions, + irq_status: InterruptStatus, + irq_enable: InterruptEnable, + _unused0: [0x10]u8, + alpha: LsbRegister(u8), + _unused1: [0xCC]u8, + prefetch: LsbRegister(u4), + _unused2: [0x0C]u8, + dither: [4]u64, + _unused3: [0xE0]u8, + vertical_scaling: Scaling, + horizontal_scaling: Scaling, + + comptime { + std.debug.assert(@sizeOf(Framebuffer) == 0x400); + } +}; + +pub const Config = extern struct { + bottom: Framebuffer, + _unused0: [0xC00]u8, + top: Framebuffer, +}; + +pub const Fifo = extern struct { + bottom: [0x1000]u8, + top: [0x1000]u8, +}; + +comptime { + _ = Config; + _ = Fifo; +} + +const lgy = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const LsbRegister = hardware.LsbRegister; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/pica.zig b/src/platform/3ds/mango/hardware/pica.zig new file mode 100644 index 0000000..9fa5859 --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica.zig @@ -0,0 +1,2940 @@ +//! Definitions for MMIO `PICA200` registers. +//! +//! - LCD screens are physically rotated 90º CCW from how the devices are held (i.e: bottom is not 320x240, is 240x320) +//! - NDC clipping volume: +//! - X: [-W, W] +//! - Y: [-W, W] +//! - Z: [0, -W] +//! - Framebuffer origin can be changed so `-1` in NDC could mean bottom-left (GL) or top-left (D3D, Metal, VK) +//! - Texture Origin is Bottom-Left, they are sampled from bottom (0, 0) to top (1, 1). +//! +//! WARNING: zitrus has opinionated register naming that suit better their usage! +//! +//! Based on the documentation found in GBATEK and 3dbrew: +//! - https://problemkaputt.de/gbatek.htm#3dsgpuinternalregisteroverview +//! - https://www.3dbrew.org/wiki/GPU/External_Registers +//! - https://www.3dbrew.org/wiki/GPU/Internal_Registers + +pub const command = @import("pica/command.zig"); +pub const shader = @import("pica/shader.zig"); + +pub const UQ0_11 = zsflt.Fixed(.unsigned, 0, 11); +pub const UQ0_12 = zsflt.Fixed(.unsigned, 0, 12); +pub const UQ0_23 = zsflt.Fixed(.unsigned, 0, 23); +pub const Q4_8 = zsflt.Fixed(.signed, 4, 8); +pub const Q0_11 = zsflt.Fixed(.signed, 0, 11); +pub const Q1_11 = zsflt.Fixed(.signed, 1, 11); + +pub const F5_10 = zsflt.Float(5, 10); +pub const F3_12 = zsflt.Float(3, 12); +pub const F7_12 = zsflt.Float(7, 12); +pub const F7_16 = zsflt.Float(7, 16); +pub const F7_23 = zsflt.Float(7, 23); +pub const F8_23 = zsflt.Float(8, 23); + +pub const Q1_11x2 = packed struct(u32) { + x: Q1_11, + _unused0: u3 = 0, + y: Q1_11, + _unused1: u3 = 0, + + pub fn init(x: Q1_11, y: Q1_11) Q1_11x2 { + return .{ .x = x, .y = y }; + } +}; + +pub const F5_10x2 = packed struct(u32) { + x: F5_10, + y: F5_10, + + pub fn init(x: F5_10, y: F5_10) F5_10x2 { + return .{ .x = x, .y = y }; + } +}; + +pub const F7_16x4 = extern struct { + pub const Unpacked = struct { x: F7_16, y: F7_16, z: F7_16, w: F7_16 }; + + data: [@divExact(@bitSizeOf(F7_16) * 4, @bitSizeOf(u32))]u32, + + pub fn pack(x: F7_16, y: F7_16, z: F7_16, w: F7_16) F7_16x4 { + var vec: F7_16x4 = undefined; + const vec_bytes = std.mem.asBytes(&vec.data); + + std.mem.writePackedInt(u24, vec_bytes, 0, @bitCast(x), .little); + std.mem.writePackedInt(u24, vec_bytes, @bitSizeOf(F7_16), @bitCast(y), .little); + std.mem.writePackedInt(u24, vec_bytes, @bitSizeOf(F7_16) * 2, @bitCast(z), .little); + std.mem.writePackedInt(u24, vec_bytes, @bitSizeOf(F7_16) * 3, @bitCast(w), .little); + std.mem.swap(u32, &vec.data[0], &vec.data[2]); + + return vec; + } + + pub fn unpack(value: F7_16x4) [4]F7_16 { + var unpacked: [3]u32 = value.data; + std.mem.swap(u32, &unpacked[0], &unpacked[2]); + + return .{ + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), 0, .little)), + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), @bitSizeOf(F7_16), .little)), + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), @bitSizeOf(F7_16) * 2, .little)), + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), @bitSizeOf(F7_16) * 3, .little)), + }; + } +}; + +pub const morton = struct { + /// Returns the morton/z-order coordinates for `value` + pub fn toDimensions(comptime T: type, comptime dimensions: usize, value: T) [dimensions]std.meta.Int(.unsigned, @divExact(@bitSizeOf(T), dimensions)) { + std.debug.assert(@typeInfo(T) == .int); + const DecomposedInt = std.meta.Int(.unsigned, @divExact(@bitSizeOf(T), dimensions)); + + // Basically bits are interleaved + // 2-dimensional 8-bits example: yxyxyxyx + var values: [dimensions]DecomposedInt = @splat(0); + var current_value = value; + inline for (0..@bitSizeOf(T)) |i| { + const shift = i / dimensions; + const set = &values[i % dimensions]; + + set.* |= @intCast((current_value & 0b1) << shift); + current_value >>= 1; + } + + return values; + } + + test toDimensions { + try testing.expectEqual([2]u3{ 0b001, 0b001 }, toDimensions(u6, 2, 0b000011)); + try testing.expectEqual([2]u3{ 0b010, 0b010 }, toDimensions(u6, 2, 0b001100)); + try testing.expectEqual([2]u3{ 0b011, 0b011 }, toDimensions(u6, 2, 0b001111)); + try testing.expectEqual([2]u3{ 0b111, 0b111 }, toDimensions(u6, 2, 0b111111)); + } + + /// Returns the morton linear index for the coordinates. + pub fn toIndex(comptime T: type, comptime dimensions: usize, value: [dimensions]T) std.meta.Int(.unsigned, dimensions * @bitSizeOf(T)) { + std.debug.assert(@typeInfo(T) == .int); + + const IndexType = std.meta.Int(.unsigned, dimensions * @bitSizeOf(T)); + const max_index = dimensions * @bitSizeOf(T); + var index: IndexType = 0; + + inline for (0..max_index) |i| { + const dimension = i % dimensions; + const dimension_bit = i / dimensions; + + index |= (@as(IndexType, value[dimension]) << (i - dimension_bit)) & (@as(IndexType, 1) << i); + } + + return index; + } + + test toIndex { + try testing.expectEqual(0b000011, toIndex(u3, 2, .{ 1, 1 })); + try testing.expectEqual(0b001100, toIndex(u3, 2, .{ 2, 2 })); + try testing.expectEqual(0b001111, toIndex(u3, 2, .{ 3, 3 })); + try testing.expectEqual(0b110000, toIndex(u3, 2, .{ 4, 4 })); + try testing.expectEqual(0b111111, toIndex(u3, 2, .{ 7, 7 })); + + try testing.expectEqual(0b101011, toIndex(u3, 2, .{ 1, 7 })); + try testing.expectEqual(0b011101, toIndex(u3, 2, .{ 7, 2 })); + } + + // TODO: We could test fuzzing (zig 0.16.0) value -> toDimensions -> toIndex -> value, as it must always be idempotent + + pub const Strategy = enum { + /// Linear -> Morton + tile, + /// Morton -> Linear + untile, + }; + + pub const ConversionOptions = struct { + input_x: usize, + input_y: usize, + input_stride: usize, + + output_x: usize, + output_y: usize, + output_stride: usize, + + width: usize, + height: usize, + + pixel_size: usize, + + pub fn full(width: usize, height: usize, pixel_size: usize) ConversionOptions { + return .{ + .input_x = 0, + .input_y = 0, + .input_stride = width * pixel_size, + + .output_x = 0, + .output_y = 0, + .output_stride = width * pixel_size, + + .width = width, + .height = height, + + .pixel_size = pixel_size, + }; + } + }; + + /// Asserts that the Morton-tiled Image is divisible by the `tile_size` + pub fn convert2(comptime strategy: Strategy, comptime tile_size: usize, dst_pixels: []u8, src_pixels: []const u8, opts: ConversionOptions) void { + comptime std.debug.assert(std.math.isPowerOfTwo(tile_size)); // We depend on this and the PICA only supports 2x2 (ETC), 8x8 and 32x32 tile sizes. + + const tile_pixels = (tile_size * tile_size); + const subtile_mask = (tile_size - 1); + const tile_shift = comptime std.math.log2(tile_size); + + const output_real_width = opts.output_stride / opts.pixel_size; + + for (0..opts.height) |current_y| { + const input_y = current_y + opts.input_y; + const output_y = current_y + opts.output_y; + + for (0..opts.width) |current_x| { + const input_x = current_x + opts.input_x; + const output_x = current_x + opts.output_x; + + const src_pixel, const dst_pixel = pxl: switch (strategy) { + .tile => { + const src_index = input_y * opts.input_stride + input_x * opts.pixel_size; + + std.debug.assert((output_real_width & subtile_mask) == 0); + const dst_tile_pixels_per_line = (output_real_width >> tile_shift) * tile_pixels; + + const dst_tile_y = output_y >> tile_shift; + const dst_tile_x = output_x >> tile_shift; + + const dst_subtile_y: u3 = @intCast(output_y & subtile_mask); + const dst_subtile_x: u3 = @intCast(output_x & subtile_mask); + const dst_subtile_morton = toIndex(u3, 2, .{ dst_subtile_x, dst_subtile_y }); + + const dst_pixel_start = (dst_tile_y * dst_tile_pixels_per_line) + (dst_tile_x * tile_pixels); + const dst_index = (dst_pixel_start + dst_subtile_morton) * opts.pixel_size; + + break :pxl .{ src_pixels[src_index..][0..opts.pixel_size], dst_pixels[dst_index..][0..opts.pixel_size] }; + }, + .untile => { + comptime unreachable; // TODO + }, + }; + + @memcpy(dst_pixel, src_pixel); + } + } + } + + pub fn convert(comptime strategy: Strategy, comptime tile_size: usize, width: usize, pixel_size: usize, dst_pixels: []u8, src_pixels: []const u8) void { + std.debug.assert(dst_pixels.len == src_pixels.len); + const max_tile_subindex = (tile_size * tile_size); + const SubindexInt = std.math.IntFittingRange(0, max_tile_subindex - 1); + + const height = @divExact(@divExact(src_pixels.len, pixel_size), width); + const width_tiles = @divExact(width, tile_size); + const height_tiles = @divExact(height, tile_size); + const stride = width * pixel_size; + + var i: usize = 0; + for (0..height_tiles) |y_tile| { + const y_start = y_tile * tile_size; + + for (0..width_tiles) |x_tile| { + const x_start = x_tile * tile_size; + + for (0..max_tile_subindex) |tile| { + const x, const y = toDimensions(SubindexInt, 2, @intCast(tile)); + + const linear_index = i; + const morton_index = (y_start + y) * stride + (x_start + x) * pixel_size; + + const src_pixel, const dst_pixel = switch (strategy) { + .tile => .{ src_pixels[morton_index..][0..pixel_size], dst_pixels[linear_index..][0..pixel_size] }, + .untile => .{ src_pixels[linear_index..][0..pixel_size], dst_pixels[morton_index..][0..pixel_size] }, + }; + + @memcpy(dst_pixel, src_pixel); + i += pixel_size; + } + } + } + } + + pub fn convertNibbles(comptime strategy: Strategy, comptime tile_size: usize, width: usize, dst_pixels: []u8, src_pixels: []const u8) void { + std.debug.assert(dst_pixels.len == src_pixels.len); + const max_tile_subindex = (tile_size * tile_size); + const SubindexInt = std.math.IntFittingRange(0, max_tile_subindex - 1); + + const height = @divExact(src_pixels.len << 1, width); + const width_tiles = @divExact(width, tile_size); + const height_tiles = @divExact(height, tile_size); + const stride = @divExact(width, 2); + + var i: usize = 0; + for (0..height_tiles) |y_tile| { + const y_start = y_tile * tile_size; + + for (0..width_tiles) |x_tile| { + const x_start = x_tile * tile_size; + + for (0..max_tile_subindex) |tile| { + const x, const y = toDimensions(SubindexInt, 2, @intCast(tile)); + + const linear_index = i >> 1; + const second_linear_nibble = (i & 1) != 0; + + const morton_x = (x_start + x); + const morton_index = (y_start + y) * stride + (morton_x >> 1); + const second_morton_nibble = (morton_x & 1) != 0; + + const src_pixel, const dst_pixel, const second_src_nibble, const second_dst_nibble = switch (strategy) { + .tile => .{ &src_pixels[morton_index], &dst_pixels[linear_index], second_morton_nibble, second_linear_nibble }, + .untile => .{ &src_pixels[linear_index], &dst_pixels[morton_index], second_linear_nibble, second_morton_nibble }, + }; + + const src_nibble = if (second_src_nibble) (src_pixel.* >> 4) else (src_pixel.* & 0xF); + const last_dst_pixel = dst_pixel.*; + + dst_pixel.* = if (second_dst_nibble) (last_dst_pixel & 0xF) | (src_nibble << 4) else (last_dst_pixel & 0xF0) | src_nibble; + i += 1; + } + } + } + } +}; + +pub const Screen = enum(u1) { + pub const width_po2 = 256; + + top, + bottom, + + pub fn other(screen: Screen) Screen { + return switch (screen) { + .top => .bottom, + .bottom => .top, + }; + } + + pub inline fn width(_: Screen) usize { + return 240; + } + + pub inline fn height(screen: Screen) usize { + return switch (screen) { + .top => 400, + .bottom => 320, + }; + } +}; + +/// Deprecated: DisplayController.Framebuffer.Pixel.Size +pub const PixelSize = DisplayController.Framebuffer.Pixel.Size; + +/// Deprecated: DisplayController.Framebuffer.Pixel +pub const ColorFormat = DisplayController.Framebuffer.Pixel; + +/// Depth values are stored as normalized integers. +pub const DepthStencilFormat = enum(u2) { + /// 2 bytes for depth, `0xDDDD`. + d16, + /// 3 bytes for depthm `0xDDDDDD`. + d24 = 2, + /// 3 bytes for depth and 1 byte for stencil `0xSSDDDDDD`. + d24_s8, + + pub fn bytesPerPixel(format: DepthStencilFormat) usize { + return switch (format) { + .d16 => @sizeOf(u16), + .d24 => 3, + .d24_s8 => @sizeOf(u32), + }; + } +}; + +/// Deprecated: use DisplayController.Framebuffer.Interlacing +pub const FramebufferInterlacingMode = DisplayController.Framebuffer.Interlacing; + +/// Deprecated: use DisplayController.Framebuffer.Dma +pub const DmaSize = DisplayController.Framebuffer.Dma; + +pub const TextureUnit = enum(u2) { + pub const main: TextureUnit = .@"0"; + pub const procedural: TextureUnit = .@"3"; + + @"0", + @"1", + @"2", + @"3", +}; + +/// The front face is always counter-clockwise and cannot be changed. +pub const CullMode = enum(u2) { + /// No triangles are discarded. + none, + /// Triangles with a counter-clockwise winding order are culled. + ccw, + /// Triangles with a clockwise winding order are culled. + cw, +}; + +pub const ScissorMode = enum(u2) { + /// No pixels will be discarded. + disable, + /// The pixels outside the scissor area will be rendered. + outside, + /// The pixels inside the scissor area will be rendered. + inside = 3, +}; + +pub const EarlyDepthCompareOperation = enum(u2) { + ge, + gt, + le, + lt, +}; + +pub const OutputMap = packed struct(u32) { + pub const Semantic = enum(u5) { + position_x, + position_y, + position_z, + position_w, + + normal_quaternion_x, + normal_quaternion_y, + normal_quaternion_z, + normal_quaternion_w, + + color_r, + color_g, + color_b, + color_a, + + texture_coordinates_0_u, + texture_coordinates_0_v, + texture_coordinates_1_u, + texture_coordinates_1_v, + texture_coordinates_0_w, + + view_x = 0x12, + view_y, + view_z, + + texture_coordinates_2_u, + texture_coordinates_2_v, + + unused = 0x1F, + _, + + pub fn isNormalQuaternion(semantic: Semantic) bool { + return switch (semantic) { + .normal_quaternion_x, .normal_quaternion_y, .normal_quaternion_z, .normal_quaternion_w => true, + else => false, + }; + } + + pub fn isColor(semantic: Semantic) bool { + return switch (semantic) { + .color_r, .color_g, .color_b, .color_a => true, + else => false, + }; + } + + pub fn isTextureCoordinates0(semantic: Semantic) bool { + return switch (semantic) { + .texture_coordinates_0_u, .texture_coordinates_0_v, .texture_coordinates_0_w => true, + else => false, + }; + } + + pub fn isTextureCoordinates1(semantic: Semantic) bool { + return switch (semantic) { + .texture_coordinates_1_u, .texture_coordinates_1_v => true, + else => false, + }; + } + + pub fn isTextureCoordinates2(semantic: Semantic) bool { + return switch (semantic) { + .texture_coordinates_2_u, .texture_coordinates_2_v => true, + else => false, + }; + } + + pub fn isView(semantic: Semantic) bool { + return switch (semantic) { + .view_x, .view_y, .view_z => true, + else => false, + }; + } + }; + + x: Semantic, + _unusd0: u3 = 0, + y: Semantic, + _unusd1: u3 = 0, + z: Semantic, + _unusd2: u3 = 0, + w: Semantic, + _unusd3: u3 = 0, + + pub fn format(map: OutputMap, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("({}, {}, {}, {})", .{ map.x, map.y, map.z, map.w }); + } +}; + +pub const BlendOperation = enum(u3) { + /// `src_factor * src + dst_factor * dst` + add, + /// `src_factor * src - dst_factor * dst` + sub, + /// `dst_factor * dst - src_factor * src` + reverse_sub, + /// `min(src_factor * src, dst_factor * dst)` + min, + /// `max(src_factor * src, dst_factor * dst)` + max, +}; + +pub const BlendFactor = enum(u4) { + zero, + one, + src_color, + one_minus_src_color, + dst_color, + one_minus_dst_color, + src_alpha, + one_minus_src_alpha, + dst_alpha, + one_minus_dst_alpha, + constant_color, + one_minus_constant_color, + constant_alpha, + one_minus_constant_alpha, + src_alpha_saturate, +}; + +pub const LogicOperation = enum(u4) { + clear, + @"and", + reverse_and, + copy, + set, + copy_inverted, + nop, + invert, + nand, + @"or", + nor, + xor, + equivalent, + and_inverted, + or_reverse, + or_inverted, +}; + +pub const CompareOperation = enum(u3) { + never, + always, + eq, + neq, + lt, + le, + gt, + ge, +}; + +pub const StencilOperation = enum(u3) { + /// Keep the current value. + keep, + /// Sets the value to `0`. + zero, + /// Sets the value to `reference`. + replace, + /// Increments the current value and clamps to the maximum representable unsigned value. + increment, + /// Decrements the current value and clamps to `0`. + decrement, + /// Bitwise-inverts the current value. + invert, + /// Increments the current value and clamps to `0` when the maximum value would have exceeded. + increment_wrap, + /// Decrements the current value and clamps to the maximum possible value when the value would go below `0`. + decrement_wrap, +}; + +/// The PICA200 supports only triangle-based primitive topologies. +pub const PrimitiveTopology = enum(u2) { + /// Specifies a series of separate triangle primitives. + /// The number of primitives generated is `(vertexCount / 3)` + triangle_list, + /// Specifies a series of connected triangle primitives with consecutive triangles sharing an edge. + /// The number of primitives generated is `max(0, vertexCount - 2)` + triangle_strip, + /// Specifies a series of connected triangle primitives with all triangles sharing a common vertex. + /// The number of primitives generated is `max(0, vertexCount - 2)` + triangle_fan, + /// Specifies a series of triangle primitives which are to be defined by the geometry shader. + /// The number of primitives generated depends on the shader implementation. + geometry, + + /// Another PICA200 classic. For `drawIndexed` (`drawElements` as GL people call it) you set + /// the primitive topology to `geometry`. + /// + /// Why? Ask the DMP engineers + pub fn indexedTopology(topology: PrimitiveTopology) PrimitiveTopology { + return switch (topology) { + .triangle_list => .geometry, + else => |topo| topo, + }; + } +}; + +pub const IndexFormat = enum(u1) { + /// Specifies that indices are unsigned 8-bit numbers. + u8, + /// Specifies that indices are unsigned 16-bit numbers. + u16, +}; + +pub const TextureUnitFilter = enum(u1) { + nearest, + linear, +}; + +pub const TextureUnitAddressMode = enum(u3) { + clamp_to_edge, + clamp_to_border, + repeat, + mirrored_repeat, +}; + +pub const TextureUnitType = enum(u3) { + @"2d", + cube_map, + shadow_2d, + projection, + shadow_cube, + disabled, +}; + +pub const TextureUnitFormat = enum(u4) { + pub const Hilo88 = extern struct { g: u8, r: u8 }; + pub const I8 = packed struct(u8) { i: u8 }; + pub const A8 = packed struct(u8) { a: u8 }; + pub const Ia88 = packed struct(u16) { i: u8, a: u8 }; + pub const I4 = packed struct(u8) { i: u8 }; + pub const A4 = packed struct(u8) { i: u8 }; + pub const Ia44 = packed struct(u8) { i: u4, a: u4 }; + + abgr8888, + bgr888, + rgba5551, + rgb565, + rgba4444, + ia88, + hilo88, + i8, + a8, + ia44, + i4, + a4, + etc1, + etc1a4, + + pub fn scale(format: TextureUnitFormat, size: usize) usize { + return switch (format) { + .abgr8888 => size << 2, + .bgr888 => size * 3, + .rgba5551, .rgb565, .rgba4444, .ia88, .hilo88 => size << 1, + .i8, .a8, .ia44, .etc1a4 => size, + .i4, .a4, .etc1 => size >> 1, + }; + } +}; + +pub const TextureUnitTexture2Coordinates = enum(u1) { + @"2", + @"1", +}; + +pub const TextureUnitTexture3Coordinates = enum(u2) { + @"0", + @"1", + @"2", +}; + +pub const MemoryFill = extern struct { + pub const Control = packed struct(u32) { + pub const none: Control = .{ .busy = false, .width = .@"16" }; + + busy: bool, + finished: bool = false, + _unused0: u6 = 0, + width: PixelSize, + _unused1: u6 = 0, + _unknown0: u5 = 0, + _unused2: u11 = 0, + + pub fn init(width: PixelSize) Control { + return .{ .busy = true, .width = width }; + } + }; + + start: AlignedPhysicalAddress(.@"16", .@"8"), + end: AlignedPhysicalAddress(.@"16", .@"8"), + value: u32, + control: Control, +}; + +pub const PictureFormatter = extern struct { + pub const Dimensions = packed struct(u32) { width: u16, height: u16 }; + + pub const Copy = extern struct { + pub const Line = packed struct(u32) { width: u16, gap: u16 }; + + size: u32, + src: Line, + dst: Line, + }; + + pub const Flags = packed struct(u32) { + pub const Downscale = enum(u2) { none, @"2x1", @"2x2" }; + + flip_v: bool, + linear_tiled: bool, + output_width_less_than_input: bool, + copy: bool, + _unwritable0: u1 = 0, + tiled_tiled: bool, + _unwritable1: u2 = 0, + src_format: ColorFormat, + _unwritable2: u1 = 0, + dst_format: ColorFormat, + _unwritable3: u1 = 0, + use_32x32_tiles: bool, + _unwritable4: u7 = 0, + downscale: Downscale, + _unwritable5: u6 = 0, + }; + + pub const Control = packed struct(u32) { + start: bool, + _unused0: u7 = 0, + finished: bool, + _unused1: u23 = 0, + }; + + src: AlignedPhysicalAddress(.@"16", .@"8"), + dst: AlignedPhysicalAddress(.@"16", .@"8"), + dst_dimensions: Dimensions, + src_dimensions: Dimensions, + flags: Flags, + write_0_before_display_transfer: u32, + control: Control, + _unknown0: u32 = 0, + copy: Copy, +}; + +pub const Graphics = extern struct { + pub const AttributeIndex = enum(u4) { @"0", @"1", @"2", @"3", @"4", @"5", @"6", @"7", @"8", @"9", @"10", @"11" }; + pub const ArrayComponentIndex = enum(u4) { @"0", @"1", @"2", @"3", @"4", @"5", @"6", @"7", @"8", @"9", @"10", @"11" }; + + pub const Interrupt = extern struct { + pub const Mask = extern struct { + disabled_low: BitpackedArray(bool, 32), + disabled_high: BitpackedArray(bool, 32), + }; + + pub const Stat = extern struct { + match_low: BitpackedArray(bool, 32), + match_high: BitpackedArray(bool, 32), + }; + + /// 0x000 + ack: [64]u8, + /// 0x040 + req: [64]u8, + /// 0x080 + cmp: [64]u8, + /// 0x0C0 + mask: Mask, + /// 0x0C8 + stat: Stat, + /// 0x0D0 + // XXX: Does this really work? Specifiying an invalid size + // (a.k.a bigger than it is will hang the GPU), even adding multiple + // req/finalize commands WILL hang the GPU (or GSP?)! This either means: + // - This doesn't work + // - Somehow something else has to be set? + autostop: LsbRegister(bool), + /// 0x0D4 + fixed_0x00010002: u32, + + comptime { + std.debug.assert(@sizeOf(Interrupt) == 0xD8); + } + }; + + pub const Rasterizer = extern struct { + pub const Mode = enum(u2) { normal, interlace, wireframe, normal_2 }; + pub const ClippingPlane = extern struct { + /// Enable the clipping plane + /// 0x11C + enable: LsbRegister(bool), + /// Coefficients of the clipping plane. + /// 0x120 + coefficients: [4]LsbRegister(F7_16), + }; + + pub const Statistics = extern struct { + /// 0x168 + vertices_received: u32, + /// 0x16C + triangles_received: u32, + /// 0x170 + triangles_displayed: u32, + }; + + pub const Scissor = extern struct { + /// 0x194 + mode: LsbRegister(ScissorMode), + /// The start of the scissor region, origin bottom-left. + /// 0x198 + start: [2]u16, + /// The end of the scissor region (inclusive), origin bottom-left. + /// 0x19C + end: [2]u16, + }; + + pub const InputMode = packed struct(u32) { + /// XXX: Textures still work when setting this to false...? + use_texture_coordinates: bool = false, + _: u31 = 0, + }; + + pub const Clock = packed struct(u32) { + position_z: bool = false, + color: bool = false, + _unused0: u6 = 0, + texture_coordinates: BitpackedArray(bool, 3) = .splat(false), + _unused1: u5 = 0, + texture_coordinates_0_w: bool = false, + _unused2: u7 = 0, + normal_or_view: bool = false, + _unused3: u7 = 0, + }; + + pub const DepthMap = extern struct { + pub const Mode = enum(u1) { + /// Precision is evenly distributed. + w, + /// Precision is higher close to the near plane. + z, + }; + + /// Scale to map depth from [0, -1] to [0, 1]. + /// 0x134 + scale: LsbRegister(F7_16), + /// Bias to map depth from [0, -1] to [0, 1]. + /// 0x138 + bias: LsbRegister(F7_16), + }; + + pub const UndocumentedConfig0 = packed struct(u32) { + _unknown0: bool = false, + _unused0: u7 = 0, + /// Sometimes interlaces, sometimes skips pixels. + /// Depends on how the GPU feels that day. + dirty_interlace_skip: bool = false, + /// Weird, it "enables" some sort of wireframe in the rasterizer, + /// it does NOT convert triangles to lines as clipped triangles will + /// be correctly splitted, is purely a rasterizer thing. + dirty_wireframe: bool, + _unused1: u22 = 0, + }; + + /// 0x100 + cull_config: LsbRegister(CullMode), + /// `Width / 2.0`, used for scaling vertex coordinates. + /// 0x104 + viewport_h_scale: LsbRegister(F7_16), + /// `2.0 / Width`, supposedly used for stepping colors and texture coordinates. + /// 0x108 + viewport_h_step: MsbRegister(F7_23), + /// `Height / 2.0`, used for scaling vertex coordinates. + /// 0x10C + viewport_v_scale: LsbRegister(F7_16), + /// `2.0 / Height`, supposedly used for stepping colors and texture coordinates. + /// 0x110 + viewport_v_step: MsbRegister(F7_23), + /// 0x114 + _unknown0: [2]u32, + /// Extra user-defined clipping plane. + /// 0x11C + extra_clipping_plane: ClippingPlane, + /// 0x130 + _unknown1: [1]u32, + /// Maps depth from NDC [0, -1] to framebuffer [0, 1]. + /// 0x134 + depth_map: DepthMap, + /// 0x13C + num_inputs: LsbRegister(u3), + /// 0x140 + inputs: [7]OutputMap, + /// 0x15C + _unknown2: u32, + /// 0x160 + shader_output_map_qualifiers: u32, // According to GBATEK this allows you to use the flat qualifier in output colors. + /// 0x164 + _unknown3: u32, + /// 0x168 + statistics: Statistics, + /// 0x174 + _unknown4: [3]u32, + /// 0x180 + config: UndocumentedConfig0, + /// So early depth somehow has a separate internal buffer that must be cleared. + /// From tests it looks like it has MUCH LESS precision and literally breaks with anything, + /// 32x32 is needed. Overall, is it really needed? + /// + /// I don't know what engineers were smoking but it has too many false fails, discarding lots of fragments. + /// XXX: No more can be said, this is vaulted until new info comes out. + /// 0x184 + early_depth_function: LsbRegister(EarlyDepthCompareOperation), + /// 0x188 + early_depth_test_enable: LsbRegister(bool), + /// 0x18C + early_depth_clear: LsbRegister(Trigger), + /// 0x190 + input_mode: InputMode, + /// 0x194 + scissor: Scissor, + /// Viewport origin, origin is bottom-left. + /// 0x1A0 + viewport_xy: [2]u16, + /// 0x1A4 + _unknown8: u32, + /// 0x1A8 + early_depth_data: LsbRegister(u24), + /// 0x1AC + _unknown9: [2]u32, + /// 0x1B4 + depth_map_mode: LsbRegister(DepthMap.Mode), + /// Does not seem to have an effect but it's still documented like this + /// 0x1B8 + _unused_render_buffer_dimensions: u32, // XXX: Why would the rasterizer need output dimensions? + /// The clock driving inputs to the rasterizer from the shader. + /// + /// If a shader outputs a value which the rasterizer doesn't clock, + /// the rasterizer reads a default value; e.g color will read (1, 1, 1, 1) + /// 0x1BC + input_clock: Clock, + }; + + pub const TextureUnits = extern struct { + pub const Config = packed struct(u32) { + texture_enabled: BitpackedArray(bool, 3), + _unused0: u5 = 0, + texture_3_coordinates: TextureUnitTexture3Coordinates, + texture_3_enabled: bool, + _unused1: u1 = 0, + _unused2: u1 = 1, + texture_2_coordinates: TextureUnitTexture2Coordinates, + _unused3: u2 = 0, + clear_texture_cache: bool, + _unused4: u15 = 0, + + pub fn format(cfg: Config, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enable: {f}, Enable procedural: {} | T2 source: {}, T3 source: {} | Clear cache: {}", .{ + cfg.texture_enabled, + cfg.texture_3_enabled, + cfg.texture_2_coordinates, + cfg.texture_3_coordinates, + cfg.clear_texture_cache, + }); + } + }; + + pub const Parameters = packed struct(u32) { + pub const Etc1Flag = enum(u2) { none, etc1 = 2 }; + + _unknown0: u1 = 0, + mag_filter: TextureUnitFilter, + min_filter: TextureUnitFilter, + _unknown1: u1 = 0, + etc1: Etc1Flag, + _unknown2: u2 = 0, + address_mode_v: TextureUnitAddressMode, + _unknown3: u1 = 0, + address_mode_u: TextureUnitAddressMode, + _unknown4: u5 = 0, + is_shadow: bool, + _unknown5: u3 = 0, + mip_filter: TextureUnitFilter, + _unused0: u3 = 0, + type: TextureUnitType, + _unused1: u1 = 0, + + pub fn format(params: Parameters, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Type: {} | Min: {}, Mag: {}, Mip: {} | U: {}, V: {}", .{ + params.type, + params.min_filter, + params.mag_filter, + params.mip_filter, + params.address_mode_u, + params.address_mode_v, + }); + } + }; + + pub const LevelOfDetail = packed struct(u32) { + bias: Q4_8, + _unknown0: u3 = 0, + max_level_of_detail: u4, + _unknown1: u4 = 0, + min_level_of_detail: u4, + _unused0: u4 = 0, + }; + + pub const Shadow = packed struct(u32) { + orthogonal: bool, + // XXX: Documented as "the higher 23-bits of an UQ0.24": Bro, thats just an UQ0.23? + z_bias: UQ0_23, + _unknown0: u8 = 0, + }; + + pub const Primary = extern struct { + border_color: [4]u8, + /// Height and WIdth, NOT Width and Height! + dimensions: [2]u16, + parameters: Parameters, + lod: LevelOfDetail, + address: [6]AlignedPhysicalAddress(.@"8", .@"8"), + shadow: Shadow, + _unknown0: u32, + _unknown1: u32, + format: LsbRegister(TextureUnitFormat), + }; + + pub const Secondary = extern struct { + border_color: [4]u8, + /// Height and WIdth, NOT Width and Height! + dimensions: [2]u16, + /// WARNING: Type is ignored in secondary texture units, they're always 2d according to 3dbrew. + parameters: Parameters, + lod: LevelOfDetail, + address: AlignedPhysicalAddress(.@"8", .@"8"), + format: LsbRegister(TextureUnitFormat), + }; + + config: Config, + @"0": Primary, + lighting_enable: LsbRegister(bool), + _unknown0: u32, + @"1": Secondary, + _unknown1: [2]u32, + @"2": Secondary, + }; + + pub const ProceduralTextureUnit = extern struct { + pub const Main = extern struct { + procedural_texture: [5]u32, + procedural_texture_5_low: u32, + procedural_texture_5_high: u32, + }; + + @"3": Main, + lut_index: u32, + lut_data: [8]u32, + }; + + pub const TextureCombiners = extern struct { + pub const FogMode = enum(u3) { disabled, fog = 5, gas = 7 }; + pub const ShadingDensity = enum(u1) { plain, depth }; + pub const BufferSource = enum(u1) { previous_buffer, previous }; + pub const Multiplier = enum(u2) { @"1x", @"2x", @"4x" }; + pub const Source = enum(u4) { + primary_color, + fragment_primary_color, + fragment_secondary_color, + texture_0, + texture_1, + texture_2, + texture_3, + previous_buffer = 0xD, + constant, + previous, + }; + + pub const ColorFactor = enum(u4) { + src_color, + one_minus_src_color, + src_alpha, + one_minus_src_alpha, + src_red, + one_minus_src_red, + src_green = 8, + one_minus_src_green, + src_blue = 12, + one_minus_src_blue, + }; + + pub const AlphaFactor = enum(u3) { + src_alpha, + one_minus_src_alpha, + src_red, + one_minus_src_red, + src_green, + one_minus_src_green, + src_blue, + one_minus_src_blue, + }; + + pub const Operation = enum(u4) { + /// `src0` + replace, + /// `src0 * src1` + modulate, + /// `src0 + src1` + add, + /// `src0 + src1 - 0.5` + add_signed, + /// `src0 * src2 + src1 * (1 - src2)` + interpolate, + /// `src0 - src1` + subtract, + /// `4 * ((src0r − 0.5) * (src1r − 0.5) + (src0g − 0.5) * (src1g − 0.5) + (src0b − 0.5) * (src1b − 0.5))` + dot3_rgb, + /// `4 * ((src0r − 0.5) * (src1r − 0.5) + (src0g − 0.5) * (src1g − 0.5) + (src0b − 0.5) * (src1b − 0.5))` + dot3_rgba, + /// `src0 * src1 + src2` (?) + multiply_add, + /// `src0 + src1 * src2` (?) + add_multiply, + }; + + pub const Config = packed struct(u32) { + fog_mode: FogMode, + shading_density_source: ShadingDensity, + _unused0: u4 = 0, + combiner_color_buffer_src: BitpackedArray(BufferSource, 4), + combiner_alpha_buffer_src: BitpackedArray(BufferSource, 4), + z_flip: bool, + _unused1: u7 = 0, + _unknown0: u2 = 0, + _unused2: u6 = 0, + + pub const BufferIndex = enum(u3) { @"1", @"2", @"3", @"4" }; + + pub fn setColorBufferSource(update_buffer: *Config, index: BufferIndex, buffer_src: BufferSource) void { + std.mem.writePackedIntNative(u1, std.mem.asBytes(update_buffer), @as(usize, @bitOffsetOf(Config, "combiner_color_buffer_src")) + @intFromEnum(index), @intFromEnum(buffer_src)); + } + + pub fn setAlphaBufferSource(update_buffer: *Config, index: BufferIndex, buffer_src: BufferSource) void { + std.mem.writePackedIntNative(u1, std.mem.asBytes(update_buffer), @as(usize, @bitOffsetOf(Config, "combiner_alpha_buffer_src")) + @intFromEnum(index), @intFromEnum(buffer_src)); + } + + pub fn format(cfg: Config, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Fog Mode: {}, Shading Density: {}, Flip Z: {} | Color Buffer Src: {}, Alpha Buffer Src: {}", .{ + cfg.fog_mode, + cfg.shading_density_source, + cfg.z_flip, + cfg.combiner_color_buffer_src, + cfg.combiner_alpha_buffer_src, + }); + } + }; + + pub const Unit = extern struct { + pub const Sources = packed struct(u32) { + color_src: BitpackedArray(Source, 3), + _unused0: u4 = 0, + alpha_src: BitpackedArray(Source, 3), + _unused1: u4 = 0, + }; + + pub const Factors = packed struct(u32) { + color_factor: BitpackedArray(ColorFactor, 3), + alpha_factor: BitpackedArray(AlphaFactor, 3), + _unused0: u11 = 0, + }; + + pub const Operations = packed struct(u32) { + color: Operation, + _unused0: u12 = 0, + alpha: Operation, + _unused1: u12 = 0, + + pub fn format(operations: Operations, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Color: {}, Alpha: {}", .{ + operations.color, + operations.alpha, + }); + } + }; + + pub const Scales = packed struct(u32) { + color: Multiplier, + _unused0: u14 = 0, + alpha: Multiplier, + _unused1: u14 = 0, + + pub fn format(scales: Scales, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Color: {}, Alpha: {}", .{ + scales.color, + scales.alpha, + }); + } + }; + + sources: Sources, + factors: Factors, + operations: Operations, + color: [4]u8, + scales: Scales, + }; + + pub const FogLutValue = packed struct(u24) { + next_difference: Q1_11, + value: UQ0_11, + }; + + /// 0x200 + @"0": Unit, + _unknown0: [3]u32, + @"1": Unit, + _unknown1: [3]u32, + @"2": Unit, + _unknown2: [3]u32, + @"3": Unit, + _unknown3: [3]u32, + config: Config, + fog_color: [4]u8, + _unknown4: [2]u32, + gas_attenuation: LsbRegister(F5_10), + gas_accumulation_max: LsbRegister(F5_10), + fog_lut_index: LsbRegister(u16), + _unknown5: u32, + fog_lut_data: [8]LsbRegister(FogLutValue), + @"4": Unit, + _unknown6: [3]u32, + @"5": Unit, + buffer_color: [4]u8, + }; + + pub const OutputMerger = extern struct { + pub const Pixel = DisplayController.Framebuffer.Pixel; + pub const Blend = enum(u1) { logic, blend }; + pub const Mode = enum(u2) { default, gas, shadow = 3 }; + pub const Interlace = enum(u1) { disable, even }; + pub const BlockSize = enum(u1) { @"8x8", @"32x32" }; + + pub const Config = packed struct(u32) { + mode: Mode, + _unused0: u6 = 0, + + blend: Blend, + _unused1: u7 = 0, + _unknown0: u8 = 0xE4, // (?) Not setting this doesn't have an effect visually (?) + /// Isn't this similar to interlace? I mean rendering each 2nd line is literally that. There should be something for odd lines maybe? + interlace: Interlace = .disable, + /// Can this be some sort of primitive discard or is literally render nothing? + disable_rendering: bool = false, + _unused2: u6 = 0, + }; + + pub const BlendConfig = packed struct(u32) { + color_op: BlendOperation, + _unused0: u5 = 0, + alpha_op: BlendOperation, + _unused1: u5 = 0, + src_color_factor: BlendFactor, + dst_color_factor: BlendFactor, + src_alpha_factor: BlendFactor, + dst_alpha_factor: BlendFactor, + }; + + pub const AlphaTest = packed struct(u32) { + enable: bool, + _unused0: u3 = 0, + op: CompareOperation, + _unused1: u1 = 0, + reference: u8, + _unused3: u16 = 0, + + pub fn format(cfg: AlphaTest, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enable: {}, Operation: {}, Reference: {}", .{ + cfg.enable, + cfg.op, + cfg.reference, + }); + } + }; + + pub const StencilTest = extern struct { + pub const Config = packed struct(u32) { + enable: bool, + _unused0: u3 = 0, + op: CompareOperation, + _unused1: u1 = 0, + compare_mask: u8, + reference: u8, + write_mask: u8, + + pub fn format(cfg: StencilTest.Config, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enable: {}, Operation: {}, Reference: {}, Compare Mask: 0x{X:0>2}, Write Mask: 0x{X:0>2}", .{ + cfg.enable, + cfg.op, + cfg.reference, + cfg.compare_mask, + cfg.write_mask, + }); + } + }; + + pub const Operation = packed struct(u32) { + fail: StencilOperation, + _unused0: u1 = 0, + depth_fail: StencilOperation, + _unused1: u1 = 0, + pass: StencilOperation, + _unused2: u21 = 0, + + pub fn format(op: StencilTest.Operation, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Fail: {}, Depth Fail: {}, Pass: {}", .{ + op.fail, + op.depth_fail, + op.pass, + }); + } + }; + + config: StencilTest.Config, + operation: StencilTest.Operation, + }; + + pub const DepthTestColorConfig = packed struct(u32) { + enable_depth_test: bool, + _unused0: u3 = 0, + depth_op: CompareOperation, + _unused1: u1 = 0, + enable_r_write: bool, + enable_g_write: bool, + enable_b_write: bool, + enable_a_write: bool, + enable_depth_write: bool, + _unused2: u19 = 0, + + pub fn format(cfg: DepthTestColorConfig, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Depth [Test: {}, Operation: {}, Write: {}] | Color {c}{c}{c}{c}", .{ + cfg.enable_depth_test, + cfg.depth_op, + cfg.enable_depth_write, + @as(u8, if (cfg.enable_r_write) 'r' else '_'), + @as(u8, if (cfg.enable_g_write) 'g' else '_'), + @as(u8, if (cfg.enable_b_write) 'b' else '_'), + @as(u8, if (cfg.enable_a_write) 'a' else '_'), + }); + } + }; + + pub const ColorAccess = enum(u4) { disable, all = 0xF }; + pub const DepthStencilAccess = enum(u2) { disable, stencil, depth, all }; + + pub const RenderBufferDimensions = packed struct(u32) { + width: u11, + _unused0: u1 = 0, + height_end: u10, + _unused1: u2 = 0, + flip_vertically: bool = false, + _unused2: u7 = 0, + + pub fn init(width: u11, height: u10, flip_vertically: bool) RenderBufferDimensions { + return .{ .width = width, .height_end = height - 1, .flip_vertically = flip_vertically }; + } + + pub fn format(dim: RenderBufferDimensions, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{}x{} (flip: {})", .{ dim.width, @as(u32, dim.height_end) + 1, dim.flip_vertically }); + } + }; + + pub const ColorBufferFormat = packed struct(u32) { + pixel_size: Pixel.Size, + _unused0: u14 = 0, + pixel_format: Pixel, + _unused1: u13 = 0, + + pub fn init(pixel_format: Pixel) ColorBufferFormat { + return .{ + .pixel_size = pixel_format.pixelSize(), + .pixel_format = pixel_format, + }; + } + + pub fn format(fmt: ColorBufferFormat, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{} ({} bits)", .{ fmt.pixel_format, fmt.pixel_size }); + } + }; + + config: Config, + blend_config: BlendConfig, + logic_config: LsbRegister(LogicOperation), + blend_color: [4]u8, + alpha_test: AlphaTest, + stencil_test: StencilTest, + depth_color_config: DepthTestColorConfig, + _unknown0: [8]u32, + invalidate: LsbRegister(Trigger), + /// Flushing without issuing a drawcall before-hand in the same submitted command queue + /// will hang the GPU + /// + /// Don't ask me why + flush: LsbRegister(Trigger), + color_read: LsbRegister(ColorAccess), + color_write: LsbRegister(ColorAccess), + depth_read: LsbRegister(DepthStencilAccess), + depth_write: LsbRegister(DepthStencilAccess), + depth_format: LsbRegister(DepthStencilFormat), + color_format: ColorBufferFormat, + early_depth_test_enable: LsbRegister(bool), + _unknown1: [2]u32, + block_size: LsbRegister(BlockSize), + depth_location: AlignedPhysicalAddress(.@"64", .@"8"), + color_location: AlignedPhysicalAddress(.@"64", .@"8"), + dimensions: RenderBufferDimensions, + _unknown2: u32, + gas_light_xy: u32, + gas_light_z: u32, + gas_light_z_color: u32, + gas_lut_index: u32, + gas_lut_data: u32, + _unknown3: u32, + gas_delta_z_depth: u32, + _unknown4: [9]u32, + fragment_operation_shadow: u32, + _unknown5: [15]u32, + }; + + /// Fragment lighting in the PICA200 is done primarily through 1D lookup tables and quaternion interpolation. + /// + /// The vertex shader (or geometry if used) must output a Quaternion representing the rotation from the z-axis + /// to the normal. This can be done in different ways, with the standard RotationFromTo(.{0, 0, 1}, Normal) or + /// the approach in the 'Shading by Quaternion Interpolation' paper. + /// + /// It must also output a View position that is optionally used for positional lights to calculate the + /// light vector, as directional lights are not affected by it. + /// + /// There are 22 `LookupTable`s available: + /// - 2 distribution tables for specular: D0 and D1 + /// - 1 fresnel table: Fr + /// - 3 reflection tables for each color channel for reflection (D1): Rr, Rg and Rb + /// - 8 spotlight tables: Sp0 to Sp7 + /// - 8 distance attenuation tables: Da0 to Da7 + /// + /// The relevant lighting formulas are these (sources below): + /// Cp -> primary color, also called diffuse / Cs -> secondary color, also called specular + /// + /// Cp = ambient + foreach light ( Da*i*(*sd*) * Sp*i*(*in*) * H * (ambient*i* + diffuse*i* * f(L * N)) ) + /// + /// Cs = foreach light ( Da*i*(*sd*) * Sp*i*(*in*) * H * (specular*i*0**x** * D0(*in*) * G + specular*i*1**x** * R**x**(*in*) * D1(*in*) * G) ) + /// + /// where: + /// - H -> shadow attenuation factor + /// - *i* -> For light *i* + /// - **x** -> Color channel (r, g or b) + /// - *sd* -> Scaled distance, clip(`bias`*i* + `scale`*i* * distance, 0, 1) + /// - *in* -> One of the `LookupTable.Input`s + /// - G -> Geometric factor, when enabled is `(L * N) / lengthSqr(L + N)`, `1.0` otherwise + /// + /// Lookup tables (except Da) can have an input domain of [-1.0, 1.0] or [0.0, 1.0] depending on the `LookupTable.Absolute` flags. + /// Da always has an input domain of [0.0, 1.0]. The mapping of input to index is: + /// - [0.0, 1.0] -> [0, 255] + /// - [-1.0, 1.0] -> [0.0, 1.0] is [0, 127] and [-1.0, 0.0] is [128, 255] + /// + /// + /// With all of that, the PICA200 can do both PBR and NPBR, for example a Blinn-Phong shading model can be done with: + /// - D0 enabled (absolute) with input N * H where each entry is `(N * H)^s` and `s` is the *shininess* of the surface. + /// + /// Sources: + /// - 3dbrew + /// - 'Primitive Processing and Advanced Shading Architecture for Embedded Space' by Max Kazakov & Eisaku Ohbuchi. + /// - Both slides and paper are useful! + /// - 'A Real-Time Configurable Shader Based on Lookup Tables' by Eisaku Ohbuchi & Hiroshi Unno. + /// - Warning: Paywalled, you must pay or access it through an institution (e.g: university) + /// - 'Shading by Quaternion Interpolation' by Anders Hast. + pub const FragmentLighting = extern struct { + pub const Color = packed struct(u32) { + b: u8, + _unused0: u2 = 0, + g: u8, + _unused1: u2 = 0, + r: u8, + _unused2: u4 = 0, + + pub fn init(r: u8, g: u8, b: u8) Color { + return .{ .r = r, .g = g, .b = b }; + } + + pub fn initBuffer(rgb: [3]u8) Color { + return .init(rgb[0], rgb[1], rgb[2]); + } + + pub fn splat(v: u8) Color { + return .init(v, v, v); + } + + pub fn format(color: Color, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("({}, {}, {})", .{ + color.r, + color.g, + color.b, + }); + } + }; + + pub const FresnelSelector = enum(u2) { none, primary, secondary, both }; + pub const LookupTable = enum(u5) { + pub const Enabled = enum(u4) { + d0_rr_sp_da, + fr_rr_sp_da, + d0_d1_rr_da, + d0_d1_fr_da, + d0_d1_rx_sp_da, + d0_fr_rx_sp_da, + d0_d1_fr_rr_sp_da, + all = 8, + }; + + pub const Index = packed struct(u32) { + index: u8, + table: LookupTable, + _unused0: u19 = 0, + + pub fn init(table: LookupTable, index: u8) Index { + return .{ .table = table, .index = index }; + } + + pub fn format(idx: Index, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{}[{d}]", .{ + idx.table, + idx.index, + }); + } + }; + + pub const Absolute = packed struct(u32) { + _unused0: u1 = 0, + disable_d0: bool = false, + _unused1: u3 = 0, + disable_d1: bool = false, + _unused2: u3 = 0, + disable_sp: bool = false, + _unused3: u3 = 0, + disable_fr: bool = false, + _unused4: u3 = 0, + disable_rb: bool = false, + _unused5: u3 = 0, + disable_rg: bool = false, + _unused6: u3 = 0, + disable_rr: bool = false, + _unused7: u6 = 0, + + pub fn format(absolute: Absolute, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, SP: {}, FR: {}, RR: {}, RG: {}, RB: {}", .{ + !absolute.disable_d0, + !absolute.disable_d1, + !absolute.disable_sp, + !absolute.disable_fr, + !absolute.disable_rr, + !absolute.disable_rg, + !absolute.disable_rb, + }); + } + }; + + pub const Input = enum(u3) { @"N * H", @"V * H", @"N * V", @"L * N", @"-L * P", @"cos(phi)" }; + pub const Select = packed struct(u32) { + d0: Input = .@"N * H", + _unused0: u1 = 0, + d1: Input = .@"N * H", + _unused1: u1 = 0, + sp: Input = .@"N * H", + _unused2: u1 = 0, + fr: Input = .@"N * H", + _unused3: u1 = 0, + rb: Input = .@"N * H", + _unused4: u1 = 0, + rg: Input = .@"N * H", + _unused5: u1 = 0, + rr: Input = .@"N * H", + _unused6: u5 = 0, + + pub fn format(select: Select, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, SP: {}, FR: {}, RR: {}, RG: {}, RB: {}", .{ + select.d0, + select.d1, + select.sp, + select.fr, + select.rr, + select.rg, + select.rb, + }); + } + }; + + pub const Multiplier = enum(u3) { @"1x", @"2x", @"4x", @"8x", @"0.25x" = 6, @"0.5x" }; + pub const Scale = packed struct(u32) { + d0: Multiplier = .@"1x", + _unused0: u1 = 0, + d1: Multiplier = .@"1x", + _unused1: u1 = 0, + sp: Multiplier = .@"1x", + _unused2: u1 = 0, + fr: Multiplier = .@"1x", + _unused3: u1 = 0, + rb: Multiplier = .@"1x", + _unused4: u1 = 0, + rg: Multiplier = .@"1x", + _unused5: u1 = 0, + rr: Multiplier = .@"1x", + _unused6: u5 = 0, + + pub fn format(scale: Scale, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, SP: {}, FR: {}, RR: {}, RG: {}, RB: {}", .{ + scale.d0, + scale.d1, + scale.sp, + scale.fr, + scale.rr, + scale.rg, + scale.rb, + }); + } + }; + + pub const Data = packed struct(u32) { + entry: UQ0_12, + next_absolute_difference: Q0_11, + _unused0: u8 = 0, + + // TODO: initBuffer + + pub fn initContext( + context: anytype, + absolute: bool, + ) [256]Data { + var lut: [256]Data = undefined; + + const absolute_unit: f32 = @floatFromInt(@intFromBool(absolute)); + const negated_unit = 1 - absolute_unit; + + const msb_multiplier: f32 = (absolute_unit * 2 - 1) * 128; + const max = 256.0 - (negated_unit * 128.0); + + var last: f32 = context.value(0.0); + for (1..lut.len) |i| { + const input = (@as(f32, @floatFromInt(i & 0x7F)) + @as(f32, @floatFromInt((i >> 7) & 0b1)) * msb_multiplier) / max; + + const current: f32 = context.value(input); + defer last = current; + + lut[i - 1] = .{ + .entry = .ofSaturating(last), + .next_absolute_difference = .ofSaturating(@abs(current - last)), + }; + } + + lut[255] = .{ .entry = .ofSaturating(last), .next_absolute_difference = .ofSaturating(context.value(absolute_unit) - last) }; + return lut; + } + + pub fn format(data: Data, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Entry: {}, Diff: {}", .{ data.entry, data.next_absolute_difference }); + } + }; + + // zig fmt: off + d0, d1, + fr = 3, + rb, rg, rr, + sp0 = 8, sp1, sp2, sp3, sp4, sp5, sp6, sp7, + da0, da1, da2, da3, da4, da5, da6, da7, + // zig fmt: on + }; + + pub const BumpMode = enum(u2) { none, bump, tangent }; + + pub const Control = extern struct { + pub const Environment = packed struct(u32) { + enable_shadow_factor: bool, + _unused0: u1 = 0, + fresnel: FresnelSelector, + enabled_lookup_tables: LookupTable.Enabled, + _unknown0: u4 = 0x4, + _unused1: u4 = 0, + apply_shadow_attenuation_to_primary_color: bool, + apply_shadow_attenuation_to_secondary_color: bool, + invert_shadow_attenuation: bool, + apply_shadow_attenuation_to_alpha: bool, + _unused2: u2 = 0, + bump_map_unit: TextureUnit, + /// BIG BRAIN TIME, lets configure the shadow map unit........ + /// Only unit 0 supports them? + shadow_map_unit: TextureUnit, + _unused3: u1 = 0, + clamp_highlights: bool, + bump_mode: BumpMode, + disable_bump_recalculation: bool, + _unknown1: u1 = 0x1, + + pub fn format(env: Environment, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enabled Tables: {}, Fresnel: {}, Shadow: {} | Shadow to primary: {}, Shadow to secondary: {}, Shadow to alpha: {}, Invert Shadow: {} | Bump Unit: {}, Shadow Unit: {} | Clamp Highlights: {}, Bump Mode: {}, Bump Recalculation: {}", .{ + env.enabled_lookup_tables, + env.fresnel, + env.enable_shadow_factor, + env.apply_shadow_attenuation_to_primary_color, + env.apply_shadow_attenuation_to_secondary_color, + env.apply_shadow_attenuation_to_alpha, + env.invert_shadow_attenuation, + env.bump_map_unit, + env.shadow_map_unit, + env.clamp_highlights, + env.bump_mode, + !env.disable_bump_recalculation, + }); + } + }; + + pub const Lights = packed struct(u32) { + shadows_disabled: BitpackedArray(bool, 8), + spotlight_disabled: BitpackedArray(bool, 8), + disable_d0: bool, + disable_d1: bool, + _unknown0: u1 = 0x1, + disable_fr: bool, + disable_rb: bool, + disable_rg: bool, + disable_rr: bool, + _unknown1: u1 = 0x1, + distance_attenuation_disabled: BitpackedArray(bool, 8), + + pub fn format(lights: Lights, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, FR: {}, RR: {}, RG: {}, RB: {} | Shadow Disabled: {f}, Spotlight Disabled: {f}, Distance Attenuation Disabled: {f}", .{ + !lights.disable_d0, + !lights.disable_d1, + !lights.disable_fr, + !lights.disable_rr, + !lights.disable_rg, + !lights.disable_rb, + lights.shadows_disabled, + lights.spotlight_disabled, + lights.distance_attenuation_disabled, + }); + } + }; + + environment: Environment, + lights: Lights, + }; + + pub const Light = extern struct { + pub const Id = enum(u4) { + _, + + pub fn init(value: u3) Id { + return @enumFromInt(value); + } + }; + + pub const Type = enum(u1) { positional, directional }; + pub const DiffuseSides = enum(u1) { one, both }; + + pub const Factors = extern struct { + specular: [2]Color, + diffuse: Color, + ambient: Color, + }; + + pub const Parameters = extern struct { + /// Its `xy` position if positional, otherwise its `xy` direction (unitary). + /// + /// If it is a directional light, the direction vector is Object -> Light, + xy: F5_10x2, + /// Its `z` position if positional, otherwise its `z` direction (unitary). + z: LsbRegister(F5_10), + /// Its `xy` spot (for spotlights) direction (unitary). + spot_xy: Q1_11x2, + /// Its `z` spot (for spotlights) direction (unitary). + spot_z: LsbRegister(Q1_11), + }; + + pub const Config = packed struct(u32) { + type: Type, + diffuse_sides: DiffuseSides, + geometric_factor_enable: BitpackedArray(bool, 2), + _unused0: u28 = 0, + }; + + pub const Attenuation = extern struct { + bias: LsbRegister(F7_12), + scale: LsbRegister(F7_12), + }; + + /// Color factors for primary and secondary colors. + factors: Factors, + parameters: Parameters, + _unknown0: u32, + config: Config, + /// Distance attenuation coefficients for the lookup input: + /// + /// `DA(clamp(distance * scale + bias, 0.0, 1.0))` + attenuation: Attenuation, + }; + + light: [8]Light, + _unknown0: [32]u32, + /// Scene/Global ambient color. + ambient: Color, + _unknown1: u32, + /// Number of active lights minus one. + num_lights_min_one: LsbRegister(u3), + control: Control, + lut_index: LookupTable.Index, + disable: LsbRegister(bool), + _unknown2: u32, + lut_data: [8]LookupTable.Data, + lut_input_absolute: LookupTable.Absolute, + lut_input_select: LookupTable.Select, + lut_input_scale: LookupTable.Scale, + _unknown3: [6]u32, + /// Maps enabled light index to its configuration. e.g: you can have 3 lights enabled but have those 3 lights be 0, 4 and 7 for example. + light_permutation: BitpackedArray(Light.Id, 8), + }; + + pub const PrimitiveEngine = extern struct { + pub const Mode = enum(u1) { drawing, config }; + + pub const PrimitiveConfig = packed struct(u32) { + total_vertex_outputs: u4, + _unused0: u4 = 0, + topology: PrimitiveTopology, + _unused1: u6 = 0, + _unknown0: u1 = 0, + _unused2: u15 = 0, + + pub fn format(cfg: PrimitiveConfig, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Topology: {} | Vertex Outputs: {}", .{ + cfg.topology, + cfg.total_vertex_outputs, + }); + } + }; + + pub const PipelineConfig = packed struct(u32) { + pub const GeometryUsage = enum(u2) { disabled, enabled = 2 }; + + geometry_shader_usage: GeometryUsage = .disabled, + _unused0: u6 = 0, + drawing_triangles: bool = false, + _unknown0: u1 = 0, + _unused1: u6 = 0, + _unknown1: u4 = 0, + _unused2: u11 = 0, + variable_geometry_inputs: bool = false, + }; + + pub const State = packed struct(u32) { + inputting_vertices_or_draw_arrays: bool = false, + _unused0: u7 = 0, + drawing_triangles: bool = false, + _unused1: u23 = 0, + }; + + pub const GeometryShader = packed struct(u32) { + pub const Mode = enum(u2) { + /// Vertex shader outputs begin filling geometry shader inputs (`point_vertices_minus_one`) until all slots are filled, + /// in which case a geometry shader invocation is performed. + point, + /// Vertex shader ouputs are buffered into uniform registers starting at `f1`, + /// `f0` stores the number of vertices in the batch. + /// + /// All drawcalls must be indexed while using this geometry shader mode. + /// + /// The first index signifies how many vertices to process, two kinds of vertices + /// are batched: main and secondary vertices; main vertices passthrough all outputs + /// while secondary ones only the first. + variable, + /// Vertex shader outputs are buffered into geometry shader uniform registers (up to `fixed_vertices_minus_one`) starting + /// at `uniform_start`. + fixed, + }; + + mode: GeometryShader.Mode, + _unused0: u6 = 0, + fixed_vertices_minus_one: u4, + point_inputs_minus_one: u4, + uniform_start: shader.register.Source.Constant, + _unused1: u1 = 0, + /// Unknown, but it is said that must be set when mode is fixed in both 3dbrew and GBATEK. + fixed: bool, + _unused2: u7 = 0, + }; + + pub const Attribute = extern struct { + pub const Format = packed struct(u4) { + pub const i8x1: Format = .{ .type = .i8, .size = .x }; + pub const i8x2: Format = .{ .type = .i8, .size = .xy }; + pub const i8x3: Format = .{ .type = .i8, .size = .xyz }; + pub const i8x4: Format = .{ .type = .i8, .size = .xyzw }; + + pub const u8x1: Format = .{ .type = .u8, .size = .x }; + pub const u8x2: Format = .{ .type = .u8, .size = .xy }; + pub const u8x3: Format = .{ .type = .u8, .size = .xyz }; + pub const u8x4: Format = .{ .type = .u8, .size = .xyzw }; + + pub const i16x1: Format = .{ .type = .i16, .size = .x }; + pub const i16x2: Format = .{ .type = .i16, .size = .xy }; + pub const i16x3: Format = .{ .type = .i16, .size = .xyz }; + pub const i16x4: Format = .{ .type = .i16, .size = .xyzw }; + + pub const f32x1: Format = .{ .type = .f32, .size = .x }; + pub const f32x2: Format = .{ .type = .f32, .size = .xy }; + pub const f32x3: Format = .{ .type = .f32, .size = .xyz }; + pub const f32x4: Format = .{ .type = .f32, .size = .xyzw }; + + pub const Type = enum(u2) { + i8, + u8, + i16, + f32, + + pub fn byteSize(typ: Type) usize { + return switch (typ) { + .i8, .u8 => @sizeOf(u8), + .i16 => @sizeOf(i16), + .f32 => @sizeOf(f32), + }; + } + }; + + pub const Size = enum(u2) { x, xy, xyz, xyzw }; + + type: Type = .i8, + size: Size = .x, + + pub fn byteSize(fmt: Format) usize { + return fmt.type.byteSize() * (@as(usize, @intFromEnum(fmt.size)) + 1); + } + }; + + pub const Config = extern struct { + pub const Flags = enum(u1) { array, fixed }; + + pub const Low = packed struct(u32) { attributes: BitpackedArray(Format, 8) = .splat(.{}) }; + pub const High = packed struct(u32) { + remaining_attributes: BitpackedArray(Format, 4) = .splat(.{}), + flags: BitpackedArray(Flags, 12) = .splat(.array), + attributes_end: u4, + }; + + low: Low, + high: High, + + pub fn setAttribute(config: *Config, index: AttributeIndex, value: Format) void { + std.mem.writePackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(Format), @bitCast(value), .little); + } + + pub fn getAttribute(config: *const Config, index: AttributeIndex) Format { + return @bitCast(std.mem.readPackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(Format), .little)); + } + + pub fn setFlag(config: *Config, index: AttributeIndex, value: Flags) void { + std.mem.writePackedInt(u1, std.mem.asBytes(config), (@as(usize, 12) * @bitSizeOf(Format)) + @intFromEnum(index) * @bitSizeOf(Flags), @intFromEnum(value), .little); + } + + pub fn getFlag(config: *const Config, index: AttributeIndex) Flags { + return @enumFromInt(std.mem.readPackedInt(u1, std.mem.asBytes(config), (@as(usize, 12) * @bitSizeOf(Format)) + @intFromEnum(index) * @bitSizeOf(Flags), .little)); + } + }; + + pub const VertexBuffer = extern struct { + pub const Config = extern struct { + pub const ArrayComponent = enum(u4) { + attribute_0, + attribute_1, + attribute_2, + attribute_3, + attribute_4, + attribute_5, + attribute_6, + attribute_7, + attribute_8, + attribute_9, + attribute_10, + attribute_11, + + padding_4, + padding_8, + padding_12, + padding_16, + }; + + pub const Low = packed struct(u32) { + components: BitpackedArray(ArrayComponent, 8) = .init(.{ + .attribute_0, + .attribute_1, + .attribute_2, + .attribute_3, + .attribute_4, + .attribute_5, + .attribute_6, + .attribute_7, + }), + }; + + pub const High = packed struct(u32) { + components: BitpackedArray(ArrayComponent, 4) = .init(.{ + .attribute_8, + .attribute_9, + .attribute_10, + .attribute_11, + }), + + bytes_per_vertex: u8, + _unused0: u4 = 0, + num_components: u4, + }; + + low: Low, + high: High, + + pub fn setComponent(config: *VertexBuffer.Config, index: ArrayComponentIndex, value: ArrayComponent) void { + std.mem.writePackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(ArrayComponent), @intFromEnum(value), .little); + } + + pub fn getComponent(config: *const VertexBuffer.Config, index: ArrayComponentIndex) ArrayComponent { + return @enumFromInt(std.mem.readPackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(ArrayComponent), .little)); + } + }; + + offset: LsbRegister(u28), + config: VertexBuffer.Config, + }; + + pub const IndexBuffer = packed struct(u32) { + offset: u28, + _unused0: u3 = 0, + fmt: IndexFormat, + + pub fn init(base_offset: u28, fmt: IndexFormat) IndexBuffer { + return .{ .offset = base_offset, .fmt = fmt }; + } + + pub fn format(idx: IndexBuffer, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("0x{X} ({t})", .{ + idx.offset, + idx.fmt, + }); + } + }; + + base: AlignedPhysicalAddress(.@"16", .@"8"), + config: Config, + vertex_buffers: [12]VertexBuffer, + index_buffer: IndexBuffer, + }; + + pub const FixedAttribute = extern struct { + pub const Index = packed struct(u32) { + /// Begin immediate submission of vertex attributes. + pub const immediate: Index = .{ .index = 0xF }; + + index: u4, + _: u28 = 0, + + pub fn register(input: u4) Index { + std.debug.assert(input < 12); + return .{ .index = input }; + } + }; + + /// If `Index.immediate` the written `value`s will begin filling shader inputs + /// and drawing primitives. Otherwise it is an index representing the attribute + /// whose `value` will be set. + index: Index, + + /// The value to write to a shader input or attribute. + value: F7_16x4, + }; + + pub const CommandBuffer = extern struct { + /// Shifted to the left by 3. + size: [2]LsbRegister(u22), + address: [2]AlignedPhysicalAddress(.@"16", .@"8"), + jump: [2]LsbRegister(Trigger), + }; + + /// Attribute info used when issuing drawcalls via `draw` or `draw_indexed`. + attributes: Attribute, + /// The amount of vertices that will be processed by a drawcall. + draw_vertex_count: u32, + config: PipelineConfig, + /// The first index used by drawcalls. Only used in `draw`, ignored by `draw_indexed`. + draw_first_index: u32, + _unknown0: [2]u32, + post_vertex_cache: LsbRegister(u8), + /// Triggers a non-indexed drawcall, will begin reading from `draw_first_index` + /// until `draw_vertex_count` vertices are processed. + draw: LsbRegister(Trigger), + /// Triggers an indexed drawcall, + draw_indexed: LsbRegister(Trigger), + _unknown1: u32, + clear_post_vertex_cache: LsbRegister(Trigger), + fixed_attribute: FixedAttribute, + _unknown2: [2]u32, + command_buffer: CommandBuffer, + _unknown3: [4]u32, + vertex_shader_input_attributes: LsbRegister(u4), + _unknown4: u32, + /// updates to the vertex shader unit will not be propagated to the geometry shader if true. + exclusive_shader_configuration: LsbRegister(bool), + mode: LsbRegister(Mode), + _unknown5: [4]u32, + vertex_shader_output_map_total_2: LsbRegister(u4), + _unknown6: [6]u32, + vertex_shader_output_map_total_1: LsbRegister(u4), + geometry_shader: GeometryShader, + state: State, + geometry_shader_full_vertices_minus_one: LsbRegister(u5), + _unknown7: u32, + _unknown8: [8]u32, + primitive_config: PrimitiveConfig, + restart_primitive: LsbRegister(Trigger), + }; + + pub const Shader = extern struct { + pub const Entry = packed struct(u32) { + entry: u16, + _: u16 = 0x7FFF, + + pub fn initEntry(entry: u16) Entry { + return .{ .entry = entry }; + } + }; + + pub const Input = packed struct(u32) { + /// Amount of input registers + inputs: u4, + _unused0: u4 = 0, + /// When true, inputs will fill uniform registers instead of input ones. + /// Used for variable and fixed geometry shader modes. + uniform: bool = false, + _unused1: u18 = 0, + enabled_for_geometry_0: bool = false, + _unknown0: u1 = 0, + enabled_for_vertex_0: bool = false, + _unused2: u1 = 0, + enabled_for_vertex_1: bool = false, + + pub fn format(input: Input, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Inputs: {d} | Uniform: {}", .{ + input.inputs, + input.uniform, + }); + } + }; + + pub const FloatUniformConfig = packed struct(u32) { + pub const Mode = enum(u1) { f7_16, f8_23 }; + + index: FloatConstantRegister, + _unused0: u24 = 0, + mode: Mode, + + pub fn format(cfg: FloatUniformConfig, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{d} ({t})", .{ + cfg.index, + cfg.mode, + }); + } + }; + + pub const AttributePermutation = extern struct { + pub const Low = packed struct(u32) { + attributes: BitpackedArray(InputRegister, 8) = .init(.{ .v0, .v1, .v2, .v3, .v4, .v5, .v6, .v7 }), + }; + + pub const High = packed struct(u32) { + remaining_attribute: BitpackedArray(InputRegister, 8) = .init(.{ .v8, .v9, .v10, .v11, .v12, .v13, .v14, .v15 }), + }; + + low: Low = .{}, + high: High = .{}, + + pub fn setAttribute(config: *AttributePermutation, index: AttributeIndex, value: InputRegister) void { + std.mem.writePackedInt(u4, std.mem.asBytes(config), @intFromEnum(index) * @bitSizeOf(InputRegister), @intFromEnum(value), .little); + } + + pub fn getAttribute(config: *AttributePermutation, index: AttributeIndex) InputRegister { + return @enumFromInt(std.mem.readPackedInt(u4, std.mem.asBytes(config), @intFromEnum(index) * @bitSizeOf(InputRegister), .little)); + } + }; + + pub const BooleanUniformMask = packed struct(u32) { + mask: BitpackedArray(bool, 16), + _unused0: u16 = 0x7FFF, + + pub fn init(mask: BitpackedArray(bool, 16)) BooleanUniformMask { + return .{ .mask = mask }; + } + }; + + bool_uniforms: BooleanUniformMask, + int_uniforms: [4][4]u8, + _unused0: [4]u32, + input: Input, + entrypoint: Entry, + attribute_permutation: AttributePermutation, + output_map_mask: LsbRegister(BitpackedArray(bool, 16)), + _unused1: u32, + code_transfer_end: LsbRegister(Trigger), + float_uniform_index: FloatUniformConfig, + float_uniform_data: [8]u32, + _unused2: [2]u32, + code_transfer_index: LsbRegister(u12), + code_transfer_data: [8]Instruction, + _unused3: u32, + operand_descriptors_index: LsbRegister(u7), + operand_descriptors_data: [8]OperandDescriptor, + }; + + /// 0x000 + irq: Interrupt, + /// 0x0D8 + _unused0: [40]u8, + /// 0x100 + rasterizer: Rasterizer, + /// 0x1C0 + _unused1: [64]u8, + /// 0x200 + texture_units: TextureUnits, + _unused2: [36]u8, + /// 0x2A0 + procedural_texture_unit: ProceduralTextureUnit, + _unused3: [32]u8, + /// 0x300 + texture_combiners: TextureCombiners, + _unused4: [8]u8, + /// 0x400 + output_merger: OutputMerger, + /// 0x500 + fragment_lighting: FragmentLighting, + _unused5: [152]u8, + /// 0x800 + primitive_engine: PrimitiveEngine, + _unused6: [128]u8, + /// 0xA00 + geometry_shader: Shader, + _unused7: [8]u8, + /// 0xAC0 + vertex_shader: Shader, + + comptime { + std.debug.assert(@offsetOf(Graphics, "irq") == 0x000); + std.debug.assert(@offsetOf(Graphics, "rasterizer") == 0x100); + std.debug.assert(@offsetOf(Graphics, "texture_units") == 0x200); + std.debug.assert(@offsetOf(Graphics, "procedural_texture_unit") == 0x2A0); + std.debug.assert(@offsetOf(Graphics, "texture_combiners") == 0x300); + std.debug.assert(@offsetOf(Graphics, "output_merger") == 0x400); + std.debug.assert(@offsetOf(Graphics, "fragment_lighting") == 0x500); + std.debug.assert(@offsetOf(Graphics, "primitive_engine") == 0x800); + std.debug.assert(@offsetOf(Graphics, "geometry_shader") == 0xA00); + std.debug.assert(@offsetOf(Graphics, "vertex_shader") == 0xAC0); + } +}; + +/// There are 3 main points where LCDs are configured: +/// * I2C (TODO: I2C in general) +/// * LCD registers (`zitrus.hardware.lcd`) +/// * These +/// +/// The GPU `DisplayController` is what drives the LCD, pushing pixels and is in charge +/// of the timing of each display (along with the IRQs). +/// +/// With these, looks like it's possible to: +/// * Change the Hz of the display itself +/// * Change the size of the displayed area, display it anywhere and set a +/// configurable border color in the non-displayed area. +/// +/// It also looks like the pixel clock is the main clock divided by 24, i.e `268111856 / 24` +/// +/// All of this has been synthetized from 3dbrew and GBATEK, both sources have wildly different +/// register naming but as everywhere else, the naming is different and reflects what I've seen +/// and think. +/// +/// WARNING: Modifying these registers CAN damage hardware in some LCDs: o3DS (burn-in) and IPS (new ones). +/// Thanks to @sono3 in the godmode9 discord who told me this. +/// Use the decls in `Preset` as those values are taken from what OFW use. +pub const DisplayController = extern struct { + pub const Color = packed struct(u32) { + r: u8, + g: u8, + b: u8, + _unused0: u8 = 0, + }; + + pub const Framebuffer = extern struct { + pub const Control = packed struct(u32) { + enable: bool, + _unused0: u7 = 0, + disable_horizontal_sync_irq: bool, + disable_vertical_sync_irq: bool, + disable_error_irq: bool, + _unused1: u5 = 0, + maybe_output_enable: bool = true, + _unused2: u15 = 0, + }; + + pub const Select = packed struct(u32) { + select: u1, + _unused0: u3 = 0, + current: u1 = 0, + _unused1: u3 = 0, + reset_fifo: bool = false, + _unused2: u7 = 0, + horizontal_ack: bool = false, + vertical_ack: bool = false, + error_ack: bool = false, + _unused3: u13 = 0, + }; + + pub const Status = packed struct(u32) { + horizontal_irq: bool, + vertical_irq: bool, + _unused0: u2 = 0, + bit: bool, + _unused1: u3 = 0, + horizontal_sync: bool, + horizontal_blank: bool, + horizontal_drawing: bool, + _unused2: u1 = 0, + vertical_sync: bool, + vertical_blank: bool, + vertical_drawing: bool, + _unknown0: bool, + _unused3: u16 = 0, + }; + + pub const Pixel = enum(u3) { + pub const Size = enum(u2) { + @"16", + @"24", + @"32", + _, + }; + + pub const Abgr8888 = extern struct { a: u8, b: u8, g: u8, r: u8 }; + pub const Bgr888 = extern struct { b: u8, g: u8, r: u8 }; + pub const Rgb565 = packed struct(u16) { b: u5, g: u6, r: u5 }; + pub const Rgba5551 = packed struct(u16) { a: u1, b: u5, g: u5, r: u5 }; + pub const Rgba4444 = packed struct(u16) { a: u4, b: u4, g: u4, r: u4 }; + + /// 4 bytes, `A B G R`. + abgr8888, + /// 3 bytes, `B G R`. + bgr888, + /// Packed, 2 bytes, `RRRRRGGGGGGBBBBB`. + rgb565, + /// Packed, 2 bytes, `RRRRRGGGGGBBBBBA`. + rgba5551, + /// Packed, 2 bytes, `RRRRGGGGBBBBAAAA`. + rgba4444, + + pub inline fn Data(comptime format: Pixel) type { + return switch (format) { + .abgr8888 => Abgr8888, + .bgr888 => Bgr888, + .rgb565 => Rgb565, + .rgba5551 => Rgba5551, + .rgba4444 => Rgba4444, + }; + } + + pub fn pixelSize(format: Pixel) Size { + return switch (format.bytesPerPixel()) { + 2 => .@"16", + 3 => .@"24", + 4 => .@"32", + else => unreachable, + }; + } + + pub fn bytesPerPixel(format: Pixel) usize { + return switch (format) { + inline else => |f| @sizeOf(f.Data()), + }; + } + + pub fn components(format: Pixel) usize { + return switch (format) { + inline else => |f| @typeInfo(f.Data()).@"struct".fields.len, + }; + } + }; + + pub const Interlacing = enum(u2) { + none, + scanline_doubling, + enable, + enable_inverted, + }; + + pub const Dma = enum(u2) { + @"32", + @"64", + @"128", + vram, + }; + + pub const Format = packed struct(u32) { + pixel_format: Pixel, + _unused0: u1 = 0, + interlacing: Interlacing, + /// Should only be used on the top screen. + /// + /// Makes the display controller reuse the same fetched pixel twice. + /// + /// Halves the pixel rate (?) + /// + /// NOTE: this is just synthetized from testing and above docs, still needs more testing + half_rate: bool, + _unused1: bool = false, + dma_size: Dma, + _unused2: u6 = 0, + unknown0: u16 = 8, + }; + + left_address: [2]AlignedPhysicalAddress(.@"16", .@"1"), + format: Format, + control: Control, + select: Select, + status: Status, + color_lookup_index: LsbRegister(u8), + color_lookup_data: Color, + _unused0: [2]u32, + stride: u32, + right_address: [2]AlignedPhysicalAddress(.@"16", .@"1"), + }; + + pub const SynchronizationPolarity = packed struct(u32) { + horizontal_active_high: bool, + _unused0: u3 = 0, + vertical_active_high: bool, + _unused1: u27 = 0, + }; + + /// Total = Back Porch Start -> Back Porch Mid (Left/Upper Border Start) -> Display Start/Back Porch End + /// -> Front Porch Start (Right/Lower Border End) -> Front Porch Mid (Supposedly Bugged) -> Front Porch End -> IRQ + /// + /// The LCDs are sensitive, look at the comment in `DisplayController`. + pub const Timing = extern struct { + pub const Display = packed struct(u32) { + back_porch_mid: u12, + _unused0: u4 = 0, + front_porch_start: u12, + _unused1: u4 = 0, + }; + + pub const Range = packed struct(u32) { + start: u12, + _unused0: u4 = 0, + end: u12, + _unused1: u4 = 0, + }; + + /// 0x00 + total: LsbRegister(u12), + /// 0x04 + back_porch_end: LsbRegister(u12), + /// 0x08 + front_porch_mid: LsbRegister(u12), + /// 0x0C + front_porch_end: LsbRegister(u12), + /// 0x10 + sync_start: LsbRegister(u12), + /// 0x14 + sync_end: LsbRegister(u12), + /// 0x18 + back_porch_start: LsbRegister(u12), + /// 0x1C + interrupt: Range, + /// 0x20 + unknown: u32, + + comptime { + std.debug.assert(@sizeOf(Timing) == 0x24); + } + }; + + pub const DisplaySize = packed struct(u32) { + width: u12, + _unused0: u4 = 0, + height: u12, + _unused1: u4 = 0, + }; + + pub const LatchingPoint = packed struct(u32) { + horizontal: u12, + _unused0: u4 = 0, + vertical: u12, + _unused1: u4 = 0, + }; + + pub const Preset = struct { + /// Top with half rate + pub const @"top_240x400@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 400 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(0), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(413), + .back_porch_end = .init(2), + .front_porch_mid = .init(402), + .front_porch_end = .init(402), + .sync_start = .init(402), + .sync_end = .init(1), + .back_porch_start = .init(2), + .interrupt = .{ + .start = 402, + .end = 406, + }, + .unknown = 0, + }, + .vertical_display_timing = .{ + .back_porch_mid = 2, + .front_porch_start = 402, + }, + }; + + /// Top with interlace enabled + pub const @"top_2x240x400@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 400 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(0), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(827), + .back_porch_end = .init(2), + .front_porch_mid = .init(802), + .front_porch_end = .init(802), + .sync_start = .init(802), + .sync_end = .init(1), + .back_porch_start = .init(2), + .interrupt = .{ + .start = 802, + .end = 806, + }, + .unknown = 0, + }, + .vertical_display_timing = .{ + .back_porch_mid = 2, + .front_porch_start = 802, + }, + }; + + /// Top + pub const @"top_240x800@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 800 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(0), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(827), + .back_porch_end = .init(2), + .front_porch_mid = .init(802), + .front_porch_end = .init(802), + .sync_start = .init(802), + .sync_end = .init(1), + .back_porch_start = .init(2), + .interrupt = .{ + .start = 802, + .end = 806, + }, + .unknown = 0, + }, + .vertical_display_timing = .{ + .back_porch_mid = 2, + .front_porch_start = 802, + }, + }; + + /// Bottom + pub const @"bottom_240x320@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 320 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(205), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(413), + .back_porch_end = .init(82), + .front_porch_mid = .init(402), + .front_porch_end = .init(402), + .sync_start = .init(79), + .sync_end = .init(80), + .back_porch_start = .init(82), + .interrupt = .{ + .start = 403, + .end = 407, + }, + .unknown = 0x00, + }, + .vertical_display_timing = .{ + .back_porch_mid = 82, + .front_porch_start = 402, + }, + }; + + display_size: DisplaySize, + horizontal_timing: Timing, + horizontal_display_timing: Timing.Display, + vertical_timing: Timing, + vertical_display_timing: Timing.Display, + + /// Tries to compute timing parameters for the specified LCD configuration, returning `null` if no + /// configuration exists. + /// + /// WARNING: THIS HAS ONLY BEEN TESTED ON A o2DS! + pub fn init(screen: Screen, half_rate: bool, x: u12, y: u12, width: u12, height: u12, refresh: f32) ?Preset { + std.debug.assert(x + width <= screen.width() and y + height <= screen.height()); + + // WARNING: changing this has (unsafe) implications, read above + const h_total = 450; + + const multiplier = if (half_rate) 1 else 2; + const remaining: f32 = @floatFromInt((zitrus.time.arm11_ticks_per_s / 24) * multiplier / (h_total + 1)); + const unbounded_v_total: u32 = @ceil(remaining / refresh) - 1; + + // We won't modify h_total so we're cooked. + if (unbounded_v_total > std.math.maxInt(u12) or unbounded_v_total < screen.height() + 12) return null; + + const v_total: u12 = @intCast(unbounded_v_total); + const v_unused_total = v_total - screen.height() - 12; + + // - First Border is Mid->End + // - Second Border is Start->Mid + const v_sync_end = v_unused_total + 2; + const v_back_porch_start = v_sync_end + 2; + const v_back_porch_mid = v_back_porch_start; + const v_back_porch_end = v_back_porch_mid + y; + + const v_front_porch_start = v_back_porch_end + height; + const v_front_porch_mid = v_back_porch_end + (screen.height() - y); + const v_front_porch_end = v_front_porch_mid; + const v_sync_start = switch (screen) { + // Yes... totally makes sense! + .top => v_front_porch_end, + // When Sync Start -> End takes more than 1 tick, + // the screen directly desyncs/fades lmao (o2DS) + .bottom => v_sync_end - 1, + }; + + const v_irq_start = v_front_porch_end + 1; + const v_irq_end = v_irq_start + 4; + + return .{ + .display_size = .{ .width = @intCast(width), .height = @intCast(height) }, + // NOTE: as HTotal is hardcoded, we don't have to calculate things (unlike with V) + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209 + x), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(205), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 209 + x + width, + }, + .vertical_timing = .{ + .total = .init(v_total), + .back_porch_end = .init(v_back_porch_end), + .front_porch_mid = .init(v_front_porch_mid), + .front_porch_end = .init(v_front_porch_end), + .sync_start = .init(v_sync_start), + .sync_end = .init(v_sync_end), + .back_porch_start = .init(v_back_porch_start), + .interrupt = .{ + .start = v_irq_start, + .end = v_irq_end, + }, + .unknown = 0x00, + }, + .vertical_display_timing = .{ + .back_porch_mid = v_back_porch_mid, + .front_porch_start = v_front_porch_start, + }, + }; + } + }; + + /// 0x00 + horizontal_timing: Timing, + /// 0x24 + vertical_timing: Timing, + /// 0x48 + synchronization_polarity: SynchronizationPolarity, + /// 0x4C + border_color: Color, + /// 0x50 + horizontal_position: LsbRegister(u12), + /// 0x54 + vertical_position: LsbRegister(u12), + /// 0x58 + _unused0: u32, + /// 0x5C + display_size: DisplaySize, + /// 0x60 + horizontal_display_timing: Timing.Display, + /// 0x64 + vertical_display_timing: Timing.Display, + /// 0x68 + framebuffer: Framebuffer, + /// 0x9C + latching_point: LatchingPoint, + /// 0xA0 + _unused2: [24]u32, + + comptime { + std.debug.assert(@sizeOf(DisplayController) == 0x100); + } +}; + +// TODO: Properly finish this +pub const Registers = extern struct { + pub const VRamPower = packed struct(u32) { + _unknown0: u8 = std.math.maxInt(u8), + power_off_a_low: bool, + power_off_a_high: bool, + power_off_b_low: bool, + power_off_b_high: bool, + _unknown1: u20 = std.math.maxInt(u20), + }; + + pub const InterruptFlags = packed struct(u32) { + _unknown0: u1 = 0, + _unknown1: u1 = 0, + _unused0: u24 = 0, + psc0: bool, + psc1: bool, + pdc0: bool, + pdc1: bool, + ppf: bool, + p3d: bool, + }; + + pub const Busy = packed struct(u32) { + // NOTE: THESE CHANGE ON P3D. TESTTESTTEST + // WHEN THE GPU HANGS SOME BITS STAY ON (AND THEY'RE ALMOST ALWAYS THE SAME ONES!!!!!) + _unknown0: bool, + _unknown1: bool, + _unknown2: bool, + _unknown3: bool, + _unknown4: bool, + _unknown5: bool, + _unknown6: bool, + _unknown7: bool, + _unknown8: bool, + _unknown9: bool, + _unknown10: bool, + _unknown11: bool, + _unknown12: bool, + _unknown13: bool, + _unknown14: bool, + _unknown15: bool, + _unknown_vram_power_0: bool, + _unknown_vram_power_1: bool, + memory_fill_busy: bool, + memory_copy_busy: bool, + _unused2: u12, + }; + + pub const TrafficStatistics = extern struct { + non_vram_reads: u32, + non_vram_writes: u32, + vram_a_reads: u32, + vram_a_writes: u32, + vram_b_reads: u32, + vram_b_writes: u32, + input_assembly_reads: u32, + sampled_texture_reads: u32, + depth_buffer_reads: u32, + depth_buffer_writes: u32, + color_buffer_reads: u32, + color_buffer_writes: u32, + top_lcd_reads: u32, + bottom_lcd_reads: u32, + memory_copy_reads: u32, + memory_copy_writes: u32, + memory_fill_writes: [2]u32, + cpu_vram_reads: u32, + cpu_vram_writes: u32, + }; + + hardware_id: u32, + clock: u32, + _unknown0: u32, + _unused0: u32, + psc: [2]MemoryFill, + vram_power: VRamPower, + irq: InterruptFlags, + _something: u32, + _make_something: u32, + _backlight_or_so_0: u32, + /// 0x044 + _unknown1: u32, + /// 0x048 + _unknown2: u32, + /// 0x04C + _unused1: u32, + /// 0x050 + timing_control: [2]u32, + /// 0x058 + busy: Busy, + /// 0x05C + _unknown3: u32, + /// 0x060 + _unknown4: u32, + /// 0x064 + _unknown5: u32, + /// 0x068 + _unknown6: u32, + _unused2: u32, + traffic_statistics: TrafficStatistics, + _backlight_or_so_1: u32, + vram_a_base_address: [*]u8, + vram_b_base_address: [*]u8, + _backlight_or_so_2: u32, + _unknown7: u32, + _unused3: [0x2C]u8, + _unused4: [0x300]u8, + pdc: [2]DisplayController, + _unused5: [0x600]u8 = @splat(0), + ppf: PictureFormatter, + _unknown8: [0xF5]u32 = @splat(0), + p3d: Graphics, + + comptime { + if (builtin.cpu.arch.isArm()) { + if (@offsetOf(Registers, "timing_control") != 0x50) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "timing_control")})); + if (@offsetOf(Registers, "traffic_statistics") != 0x70) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "traffic_statistics")})); + if (@offsetOf(Registers, "pdc") != 0x400) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "pdc")})); + if (@offsetOf(Registers, "ppf") != 0xC00) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "ppf")})); + if (@offsetOf(Registers, "p3d") != 0x1000) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "p3d")})); + } + } +}; + +comptime { + if (@sizeOf(MemoryFill) != 0x10) + @compileError(std.fmt.comptimePrint("(@sizeOf(MemoryFill) == 0x{X}) and 0x{X} != 0x10!", .{ @sizeOf(MemoryFill), @sizeOf(MemoryFill) })); + + if (@sizeOf(PictureFormatter) != 0x2C) + @compileError(std.fmt.comptimePrint("(@sizeOf(MemoryCopy) == 0x{X}) and 0x{X} != 0x2C!", .{ @sizeOf(PictureFormatter), @sizeOf(PictureFormatter) })); + + _ = morton; + _ = shader; +} + +const testing = std.testing; + +const builtin = @import("builtin"); + +const std = @import("std"); +const zsflt = @import("zsflt"); +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const Trigger = hardware.Trigger; +const LsbRegister = hardware.LsbRegister; +const MsbRegister = hardware.MsbRegister; +const BitpackedArray = hardware.BitpackedArray; +const AlignedPhysicalAddress = hardware.AlignedPhysicalAddress; +const PhysicalAddress = hardware.PhysicalAddress; + +const OperandDescriptor = shader.encoding.OperandDescriptor; +const Instruction = shader.encoding.Instruction; +const FloatConstantRegister = shader.register.Source.Constant; +const InputRegister = shader.register.Source.Input; diff --git a/src/platform/3ds/mango/hardware/pica/command.zig b/src/platform/3ds/mango/hardware/pica/command.zig new file mode 100644 index 0000000..f52fcfa --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/command.zig @@ -0,0 +1,544 @@ +//! Type-safe PICA200 `pica.Graphics` command wrappers and types. +//! +//! Address and Size of command queues/buffers/lists must be aligned to 16 bytes +//! Commands are aligned to 8 bytes + +pub const Header = packed struct(u32) { + pub const Mode = enum(u1) { consecutive, incremental }; + + id: Id, + mask: u4, + extra: u8, + _unused0: u3 = 0, + mode: Mode, +}; + +pub const Id = enum(u16) { + _, + + pub fn fromRegister(comptime base: *volatile pica.Graphics, register: *volatile anyopaque) Id { + std.debug.assert(@intFromPtr(register) >= @intFromPtr(base) and @intFromPtr(register) < (@intFromPtr(base) + @sizeOf(pica.Graphics))); // invalid internal register, pointer is not within the valid range + + const offset = @intFromPtr(register) - @intFromPtr(base); + + std.debug.assert((offset % @alignOf(u32)) == 0); // invalid internal register, it must be aligned to 4 bytes + + return @enumFromInt(@divExact(offset, @alignOf(u32))); + } +}; + +/// WARNING: using this will bloat your binary! +pub const Dump = struct { + pub const Iterator = struct { + words: []const u32, + current: u32, + + pub fn init(words: []const u32) Iterator { + return .{ .words = words, .current = 0 }; + } + + pub fn next(it: *Iterator) ?Dump { + if (it.current + 1 >= it.words.len) return null; + const hdr: Header = @bitCast(it.words[it.current + 1]); + const full_len = 2 + @as(u32, hdr.extra); + defer it.current += std.mem.alignForward(u32, full_len, 2); + + return .{ .words = it.words[it.current..][0..@min(full_len, it.words.len - it.current)] }; + } + }; + + pub const Single = struct { + // XXX: This is quite bad, a rewrite would be good + pub const Info = struct { + /// Fully qualified name + name: []const u8, + type: type, + + pub fn findName(id: Id) []const u8 { + return switch (@intFromEnum(id)) { + (@sizeOf(pica.Graphics) / @sizeOf(u32))...0xFFFF => "", + inline else => |word_offset| find(word_offset * @sizeOf(u32)).name, + }; + } + + pub fn find(comptime offset: u32) Info { + @setEvalBranchQuota(200000); + + var current = Search.find(pica.Graphics, offset); + var current_offset = offset - @offsetOf(pica.Graphics, current.name); + var fully_qualified_name = current.name; + while (@sizeOf(current.type) > @sizeOf(u32)) switch (@typeInfo(current.type)) { + .@"struct" => |st| switch (st.layout) { + .auto => unreachable, + .@"packed" => unreachable, // Hitting this means you have an invalid packed struct in there. + .@"extern" => { + const next = Search.find(current.type, current_offset); + + current_offset -= @offsetOf(current.type, next.name); + current = next; + + fully_qualified_name = fully_qualified_name ++ "." ++ next.name; + }, + }, + .array => |array| switch (std.math.order(@sizeOf(array.child), @sizeOf(u32))) { + .lt => current.type = [@divExact(@sizeOf(u32), @sizeOf(array.child))]array.child, + .eq, .gt => { + fully_qualified_name = fully_qualified_name ++ std.fmt.comptimePrint("[{d}]", .{current_offset / @sizeOf(array.child)}); + current_offset %= @sizeOf(array.child); + current.type = array.child; + }, + }, + else => @compileError("TODO"), + }; + + return .{ .name = fully_qualified_name, .type = current.type }; + } + + const Search = struct { + parent: type, + offset: u32, + + pub fn find(comptime T: type, comptime offset: u32) Info { + const fields = @typeInfo(T).@"struct".fields; + const ctx: Search = .{ .parent = T, .offset = offset }; + const index = std.sort.binarySearch(std.builtin.Type.StructField, fields, ctx, Search.compare) orelse unreachable; + return .{ .name = fields[index].name, .type = fields[index].type }; + } + + pub fn compare(ctx: Search, field: std.builtin.Type.StructField) std.math.Order { + const field_offset = @offsetOf(ctx.parent, field.name); + if (ctx.offset < field_offset) return .lt; + if (ctx.offset >= field_offset + @sizeOf(field.type)) return .gt; + return .eq; + } + }; + }; + + mode: Header.Mode, + id: Id, + raw: u32, + + pub fn format(single: Single, w: *std.Io.Writer) std.Io.Writer.Error!void { + switch (@intFromEnum(single.id)) { + (@sizeOf(pica.Graphics) / @sizeOf(u32))...0xFFFF => try w.print("{X:0>8}", .{single.raw}), + inline else => |word_offset| { + const info: Info = .find(word_offset * @sizeOf(u32)); + + switch (single.mode) { + .incremental => { + try w.print("{s} ({X:0>3}) -> ", .{ info.name, single.id }); + try printValue(info.type, single.raw, w); + }, + .consecutive => try printValue(info.type, single.raw, w), + } + }, + } + } + + pub fn printValue(comptime T: type, raw: u32, w: *std.Io.Writer) std.Io.Writer.Error!void { + const typed: T = switch (@typeInfo(T)) { + .@"enum" => @enumFromInt(raw), + else => @bitCast(raw), + }; + + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else if (T == u32) "{X:0>8}" else "{any}", .{typed}); + } + }; + + words: []const u32, + + pub fn format(dump: Dump, w: *std.Io.Writer) std.Io.Writer.Error!void { + const hdr: Header = @bitCast(dump.words[1]); + + switch (hdr.mode) { + .incremental => try w.print("{t} ({b:0>4})", .{ hdr.mode, hdr.mask }), + .consecutive => try w.print("{t}: {s} ({X:0>3}, {b:0>4})", .{ hdr.mode, Single.Info.findName(hdr.id), hdr.id, hdr.mask }), + } + + if (hdr.extra > 0) try w.writeByte('\n'); + + var single: Single = .{ + .mode = hdr.mode, + .id = hdr.id, + .raw = dump.words[0], + }; + + var i: u32 = 0; + while (true) { + try w.print("... {f}", .{single}); + + if (i >= hdr.extra) break; + try w.writeByte('\n'); + single = .{ + .mode = hdr.mode, + .raw = dump.words[2 + i], + .id = switch (hdr.mode) { + .consecutive => single.id, + .incremental => @enumFromInt(@intFromEnum(single.id) + 1), + }, + }; + i += 1; + } + } +}; + +pub const Queue = struct { + pub const empty: Queue = .{ .buffer = .empty, .end = 0 }; + + buffer: []align(16) u32, + end: u32, + + pub fn initBuffer(buffer: []align(16) u32) Queue { + return .{ + .buffer = buffer, + .end = 0, + }; + } + + pub fn slice(queue: Queue) []align(16) u32 { + return queue.buffer[0..queue.end]; + } + + pub fn unusedCapacitySlice(queue: Queue) []align(8) u32 { + return @alignCast(queue.buffer[queue.end..]); + } + + pub fn reset(queue: *Queue) void { + queue.end = 0; + } + + pub inline fn add(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, value: std.meta.Child(@TypeOf(register))) void { + return queue.addMasked(base, register, value, 0xF); + } + + pub fn addMasked(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, value: std.meta.Child(@TypeOf(register)), mask: u4) void { + comptime std.debug.assert(@typeInfo(@TypeOf(register)) == .pointer); + + const Child = std.meta.Child(@TypeOf(register)); + const child_info = @typeInfo(Child); + + const id: Id = .fromRegister(base, register); + + switch (comptime std.math.order(@bitSizeOf(Child), @bitSizeOf(u32))) { + .eq => queue.addMaskedBuffer(id, &.{switch (child_info) { + .@"enum" => @intFromEnum(value), + else => @bitCast(value), + }}, mask, .consecutive), + .gt => { + const as_u32_array = switch (child_info) { + .array => |a| if (@bitSizeOf(a.child) != @bitSizeOf(u32)) + @compileError("only arrays of 32-bit types are supported for incremental writes") + else + @as([a.len]u32, @bitCast(value)), + .@"struct" => |s| if (s.layout == .auto or (@bitSizeOf(Child) % @bitSizeOf(u32)) != 0) + @compileError("only non-auto structs with a bitSize multiple of 32 are supported") + else + @as([@divExact(@bitSizeOf(Child), @bitSizeOf(u32))]u32, @bitCast(value)), + else => @compileError("unsupported type for incremental write"), + }; + + queue.addMaskedBuffer(id, &as_u32_array, mask, .incremental); + }, + .lt => @compileError("commands only support writing full 32-bit values (which you can mask!)"), + } + } + + fn IncrementalWritesTuple(comptime base: *volatile pica.Graphics, comptime registers: anytype) type { + const RegistersType = @TypeOf(registers); + + comptime std.debug.assert(@typeInfo(RegistersType) == .@"struct"); + const st_ty = @typeInfo(RegistersType).@"struct"; + + comptime std.debug.assert(st_ty.is_tuple); + + var needed_field_types: [st_ty.fields.len]type = undefined; + + @setEvalBranchQuota(st_ty.fields.len * 2000); + for (st_ty.fields, 0..) |field, i| { + std.debug.assert(@typeInfo(field.type) == .pointer); + + const f_ty = @typeInfo(field.type).pointer; + const current = registers[i]; + const current_id: Id = .fromRegister(base, current); + + if (@bitSizeOf(f_ty.child) != @bitSizeOf(u32)) @compileLog("only values with a @bitSizeOf(u32) are supported."); + + if (i > 0) { + const last_id: Id = .fromRegister(base, registers[i - 1]); + + comptime std.debug.assert(std.math.order(@intFromEnum(current_id), @intFromEnum(last_id)) == .gt); + comptime std.debug.assert((@intFromEnum(current_id) - @intFromEnum(last_id)) == 1); + } + + needed_field_types[i] = f_ty.child; + } + + return @Tuple(&needed_field_types); + } + + pub inline fn addIncremental(queue: *Queue, comptime base: *volatile pica.Graphics, comptime registers: anytype, values: IncrementalWritesTuple(base, registers)) void { + return queue.addIncrementalMasked(base, registers, values, 0b1111); + } + + pub fn addIncrementalMasked(queue: *Queue, comptime base: *volatile pica.Graphics, comptime registers: anytype, values: IncrementalWritesTuple(base, registers), mask: u4) void { + if (registers.len == 0) return; + + comptime std.debug.assert(values.len <= 256); + const first_id: Id = .fromRegister(base, registers[0]); + + var u32_values: [values.len]u32 = undefined; + inline for (&u32_values, 0..) |*v, i| v.* = switch (@typeInfo(@TypeOf(values[i]))) { + .@"enum" => @intFromEnum(values[i]), + else => @bitCast(values[i]), + }; + + return queue.addMaskedBuffer(first_id, &u32_values, mask, .incremental); + } + + pub inline fn addConsecutive(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, values: []const std.meta.Child(@TypeOf(register))) void { + return queue.addConsecutiveMasked(base, register, values, 0b1111); + } + + pub fn addConsecutiveMasked(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, values: []const std.meta.Child(@TypeOf(register)), mask: u4) void { + comptime std.debug.assert(@typeInfo(@TypeOf(register)) == .pointer); + + const Child = std.meta.Child(@TypeOf(register)); + const id: Id = .fromRegister(base, register); + + comptime std.debug.assert(@bitSizeOf(Child) == @bitSizeOf(u32)); + + return queue.addMaskedBuffer(id, @ptrCast(values), mask, .consecutive); + } + + pub fn addMaskedBuffer(queue: *Queue, id: Id, values: []const u32, mask: u4, mode: Header.Mode) void { + if (values.len == 0) return; + + var current_id: Id = id; + var current: usize = 0; + var remaining: usize = values.len; + + while (remaining > 0) { + const len = @min(remaining, 256); + defer { + current += len; + remaining -= len; + } + + const remaining_slice = values[current..][0..len]; + + queue.buffer[queue.end] = remaining_slice[0]; + queue.buffer[queue.end + 1] = @bitCast(Header{ + .id = id, + .mask = mask, + .extra = @intCast(len - 1), + .mode = mode, + }); + queue.end += 2; + + @memcpy(queue.buffer[queue.end..][0..(len - 1)], remaining_slice[1..len]); + queue.end += std.mem.alignForward(usize, len - 1, 2); // commands must be aligned to 8 bytes + if (mode == .incremental) current_id = @enumFromInt(@intFromEnum(current_id) + len); + } + } + + pub fn chain(queue: *Queue, address: zitrus.hardware.AlignedPhysicalAddress(.@"16", .@"8")) *zitrus.hardware.LsbRegister(u22) { + const p3d = &zitrus.memory.arm11.pica.p3d; + + const size = &queue.buffer[queue.end]; + queue.add(p3d, &p3d.primitive_engine.command_buffer.size[0], .init(0)); + queue.add(p3d, &p3d.primitive_engine.command_buffer.address[0], address); + queue.add(p3d, &p3d.primitive_engine.command_buffer.jump[0], .init(.trigger)); + + if (!std.mem.isAligned(queue.end, 4)) { + queue.add(p3d, &p3d.primitive_engine.command_buffer.jump[0], .init(.trigger)); + } + + return @ptrCast(size); + } + + pub fn finalize(queue: *Queue) void { + const p3d = &zitrus.memory.arm11.pica.p3d; + + queue.add(p3d, &p3d.irq.req[0..4].*, @bitCast(@as(u32, 0x12345678))); + + if (!std.mem.isAligned(queue.end, 4)) { + queue.add(p3d, &p3d.irq.req[0..4].*, @bitCast(@as(u32, 0x12345678))); + } + } +}; + +/// Represents a growable command stream (multiple chained command queues) +pub const stream = struct { + pub const StreamResetMode = enum { free_all, retain_largest }; + pub const Segment = struct { + queue: Queue, + node: std.SinglyLinkedList.Node, + + comptime { + std.debug.assert(@sizeOf(Segment) == 16); + } + + pub fn data(segment: *Segment) []align(16) u32 { + return @as([*]align(16) u32, @ptrCast(@alignCast(segment)))[0 .. @divExact(@sizeOf(Segment), @sizeOf(u32)) + segment.queue.buffer.len]; + } + }; + + /// Context must have a field called `use_jumps` which toggles whether the stream + /// is a single command queue or multiple chained ones (when growing it) + /// + /// If `use_jumps` is not comptime-known or is `true`, it must also implement + /// `fn virtualToPhysical(ctx, virtual: *align(4096) const anyopaque) zitrus.hardware.PhysicalAddress`. + pub fn Custom(comptime Context: type) type { + return struct { + pub const empty: Stream = .{ .list = .{}, .last_chain_size = null, .initial_chunk = &.{}, .start = 0 }; + + list: std.SinglyLinkedList, + last_chain_size: ?*zitrus.hardware.LsbRegister(u22), + initial_chunk: []align(16) const u32, + /// This is intended to be modified directly, must be aligned to 4 words (16 bytes) + /// + /// Changes when finalizing or chaining queues (e.g when growing) + start: u32, + + pub fn deinit(strm: *Stream, gpa: std.mem.Allocator) void { + strm.reset(gpa, .free_all); + strm.* = undefined; + } + + pub fn first(strm: *Stream) ?*Queue { + const head = strm.list.first orelse return null; + const segment: *Segment = @fieldParentPtr("node", head); + return &segment.queue; + } + + /// Grows the stream exponentially, i.e 4096->8192->16384; starting from `min_len` + pub fn grow( + strm: *Stream, + gpa: std.mem.Allocator, + /// Length of the first queue *in `u32`*s + min_len: u32, + ctx: Context, + ) !void { + std.debug.assert(min_len >= @sizeOf(Segment)); // You're crazy, please bump the len A LOT. + std.debug.assert(std.mem.isAligned(strm.start, 4)); + + const segment = if (strm.list.first) |node| blk: { + const first_segment: *Segment = @alignCast(@fieldParentPtr("node", node)); + const first_que: *Queue = &first_segment.queue; + const first_data = first_segment.data(); + const next_len = first_data.len << 1; + + if (!ctx.use_jumps) { + std.debug.assert(first_segment.node.next == null); + + const new_len = first_data.len + next_len; + const new = if (gpa.remap(first_data, new_len)) |remapped| remapped else remapped: { + const new = try gpa.alignedAlloc(u32, .@"16", new_len); + defer gpa.free(first_data); + + const copying = first_data[0 .. @divExact(@sizeOf(Segment), @sizeOf(u32)) + first_segment.queue.end]; + @memcpy(new[0..copying.len], copying); + break :remapped new; + }; + + const new_segment: *Segment = @ptrCast(new); + // NOTE: we copied all commands above + new_segment.queue.buffer = new[@divExact(@sizeOf(Segment), @sizeOf(u32))..]; + strm.list.first = &new_segment.node; + return; + } + + const new_segment = try allocSegment(gpa, next_len); + const had_last_chain = strm.last_chain_size != null; + + if (strm.last_chain_size) |last_size| { + const len = (first_que.end - strm.start); + last_size.* = .init(@intCast((len * @sizeOf(u32)) >> 3)); + } + + strm.last_chain_size = first_que.chain(.fromPhysical(ctx.virtualToPhysical(new_segment.queue.buffer.ptr))); + + if (!had_last_chain) { + strm.initial_chunk = @alignCast(first_que.buffer[strm.start..first_que.end]); + } + + strm.start = 0; + break :blk new_segment; + } else try allocSegment(gpa, min_len); + + strm.list.prepend(&segment.node); + } + + /// Finalizes and returns the initial chunk of the stream or null if none. + pub fn finalize(strm: *Stream) ?[]align(16) const u32 { + std.debug.assert(std.mem.isAligned(strm.start, 4)); + + const que = strm.first() orelse return null; + + // Nothing to finalize + if (strm.start == que.end and strm.last_chain_size == null) return null; + que.finalize(); + + const initial_chunk: []align(16) const u32 = if (strm.last_chain_size) |last_size| blk: { + last_size.* = .init(@intCast(((que.end - strm.start) * @sizeOf(u32)) >> 3)); + break :blk strm.initial_chunk; + } else @alignCast(que.buffer[strm.start..que.end]); + + strm.last_chain_size = null; + strm.start = que.end; + return initial_chunk; + } + + pub fn reset(strm: *Stream, gpa: std.mem.Allocator, mode: StreamResetMode) void { + const first_node = strm.list.first orelse return; + strm.last_chain_size = null; + strm.initial_chunk = &.{}; + strm.start = 0; + + var freeing = switch (mode) { + .free_all => blk: { + strm.list.first = null; + break :blk first_node; + }, + .retain_largest => blk: { + first_node.next = null; + + const first_segment: *Segment = @fieldParentPtr("node", first_node); + first_segment.queue.end = 0; + break :blk first_node.next; + }, + }; + + while (freeing) |node| { + freeing = node.next; + + const segment: *Segment = @alignCast(@fieldParentPtr("node", node)); + const segment_data = segment.data(); + gpa.free(segment_data); + } + } + + fn allocSegment(gpa: std.mem.Allocator, len: u32) !*Segment { + const data = try gpa.alignedAlloc(u32, .@"16", len); + const segment: *Segment = @ptrCast(data); + + segment.* = .{ + .queue = .{ + .buffer = data[@divExact(@sizeOf(Segment), @sizeOf(u32))..], + .end = 0, + }, + .node = .{}, + }; + + return segment; + } + + const Stream = @This(); + }; + } +}; + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const pica = zitrus.hardware.pica; diff --git a/src/platform/3ds/mango/hardware/pica/shader.zig b/src/platform/3ds/mango/hardware/pica/shader.zig new file mode 100644 index 0000000..beafe10 --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader.zig @@ -0,0 +1,70 @@ +//! PICA200 shader ISA encoder, assembler and (TODO) disassembler. +//! +//! * `as` - zitrus PICA200 shader assembler / disassebler. +//! * `register` - register enums for everything shader related +//! * `encoding` - single instruction encoding +//! * `spirv` - ?? :) (TODO) +//! +//! * `Encoder` - Type-safe PICA200 shader ISA encoder + +pub const Type = enum(u1) { + vertex, + geometry, +}; + +pub const Geometry = union(Kind) { + pub const Kind = enum { + point, + variable, + fixed, + }; + + pub const Point = struct { + inputs: u5, + }; + + pub const Variable = struct { + full_vertices: u5, + }; + + pub const Fixed = struct { + vertices: u5, + uniform_start: register.Source.Constant, + }; + + point: Point, + variable: Variable, + fixed: Fixed, + + pub fn initPoint(inputs: u5) Geometry { + return .{ .point = .{ .inputs = inputs } }; + } + + pub fn initVariable(full_vertices: u5) Geometry { + return .{ .variable = .{ .full_vertices = full_vertices } }; + } + + pub fn initFixed(vertices: u5, uniform_start: register.Source.Constant) Geometry { + return .{ .fixed = .{ .vertices = vertices, .uniform_start = uniform_start } }; + } +}; + +pub const as = @import("shader/as.zig"); +pub const Encoder = @import("shader/Encoder.zig"); + +pub const register = @import("shader/register.zig"); +pub const encoding = @import("shader/encoding.zig"); + +pub const spirv = @import("shader/spirv.zig"); + +comptime { + _ = as; + _ = Encoder; + + _ = register; + _ = encoding; + + _ = spirv; +} + +const std = @import("std"); diff --git a/src/platform/3ds/mango/hardware/pica/shader/Encoder.zig b/src/platform/3ds/mango/hardware/pica/shader/Encoder.zig new file mode 100644 index 0000000..8a0c35f --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader/Encoder.zig @@ -0,0 +1,428 @@ +//! Type-safe PICA200 shader ISA encoder + +pub const OperandDescriptorAllocationError = error{OutOfDescriptors}; +pub const InstructionEncodingError = error{InvalidSourceRegisterCombination}; + +const max_descriptors = std.math.maxInt(u7); + +instructions: std.ArrayList(Instruction), +descriptors: [max_descriptors]OperandDescriptor, +masks: [max_descriptors]OperandDescriptor.Mask, +allocated_descriptors: u8, + +pub const init: Encoder = .{ + .instructions = .empty, + .descriptors = undefined, + .masks = undefined, + .allocated_descriptors = 0, +}; + +pub fn move(encoder: *Encoder) Encoder { + defer encoder.* = .init; + return encoder.*; +} + +pub fn deinit(encoder: *Encoder, allocator: Allocator) void { + encoder.instructions.deinit(allocator); + encoder.* = undefined; +} + +pub fn constDescriptorSlice(encoder: *const Encoder) []const OperandDescriptor { + return encoder.descriptors[0..encoder.allocated_descriptors]; +} + +pub fn descriptorSlice(encoder: *Encoder) []OperandDescriptor { + return encoder.descriptors[0..encoder.allocated_descriptors]; +} + +pub fn getOrAllocateOperandDescriptor(encoder: *Encoder, comptime T: type, comptime descriptor_mask: OperandDescriptor.Mask, operand_descriptor: OperandDescriptor) OperandDescriptorAllocationError!T { + std.debug.assert(T == u5 or T == u7); + + for (encoder.descriptors[0..encoder.allocated_descriptors], encoder.masks[0..encoder.allocated_descriptors], 0..) |*descriptor, *mask, i| { + if (mask.*.contains(descriptor_mask) and operand_descriptor.equalsMasked(descriptor_mask, descriptor.*)) { + // Reuse the descriptor + return @intCast(i); + } + + if (descriptor_mask.contains(mask.*) and operand_descriptor.equalsMasked(mask.*, descriptor.*)) { + // Reuse and expand the descriptor + descriptor.* = operand_descriptor; + mask.* = descriptor_mask; + return @intCast(i); + } + } + + if (encoder.descriptors.len == encoder.allocated_descriptors) { + return error.OutOfDescriptors; + } + + if (encoder.allocated_descriptors < std.math.maxInt(T)) { + encoder.descriptors[encoder.allocated_descriptors] = operand_descriptor; + encoder.masks[encoder.allocated_descriptors] = descriptor_mask; + encoder.allocated_descriptors += 1; + return @intCast(encoder.allocated_descriptors - 1); + } + + // TODO: + // Swap a non-reduced descriptor or return error + return error.OutOfDescriptors; +} + +pub fn addInstruction(encoder: *Encoder, allocator: Allocator, instruction: Instruction) !void { + try encoder.instructions.append(allocator, instruction); +} + +pub fn unparametized(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode) !void { + return encoder.addInstruction(alloc, .{ .unparametized = .{ .opcode = opcode } }); +} + +pub fn unary(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .unary, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + }); + + return encoder.addInstruction(alloc, .{ .register = .{ + .operand_descriptor_id = descriptor_id, + .src1 = src1, + .src2 = .v0, + .address_component = src_rel, + .dst = dest, + .opcode = opcode, + } }); +} + +pub fn binary(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + if (!src1.isLimited() and !src2.isLimited()) { + return error.InvalidSourceRegisterCombination; + } + + if (src1.isLimited() != src2.isLimited() and !src2.isLimited()) { + if (!opcode.isCommutative()) { + if (opcode.invert()) |opcode_i| { + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .binary, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + }); + + return encoder.addInstruction(alloc, .{ .register_inverted = .{ .operand_descriptor_id = descriptor_id, .src2 = src2, .src1 = src1.toLimited().?, .address_component = src_rel, .dst = dest, .opcode = opcode_i } }); + } + + return error.InvalidSourceRegisterCombination; + } + + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .binary, .{ + .dst_mask = dst_mask, + .src1_neg = src2_neg, + .src1_selector = src2_selector, + .src2_neg = src1_neg, + .src2_selector = src1_selector, + }); + + return encoder.addInstruction(alloc, .{ .register = .{ .operand_descriptor_id = descriptor_id, .src2 = src1.toLimited().?, .src1 = src2, .address_component = src_rel, .dst = dest, .opcode = opcode } }); + } + + // TODO: If commutative we could search and reuse an operand descriptor with swapped src1 <=> src2 + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .binary, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + }); + + return encoder.addInstruction(alloc, .{ .register = .{ .operand_descriptor_id = descriptor_id, .src2 = src2.toLimited().?, .src1 = src1, .address_component = src_rel, .dst = dest, .opcode = opcode } }); +} + +pub fn flow(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, num: u8, dest: u12, condition: Condition, x: bool, y: bool) !void { + return encoder.addInstruction(alloc, .{ .control_flow = .{ + .num = num, + .dst = dest, + .condition = condition, + .ref_x = x, + .ref_y = y, + .opcode = opcode, + } }); +} + +pub fn flowConstant(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, num: u8, dest: u12, constant: IntegralRegister) !void { + return encoder.addInstruction(alloc, .{ .constant_control_flow = .{ + .num = num, + .dst = dest, + .src = constant, + .opcode = opcode, + } }); +} + +pub fn add(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .add, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dp3(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dp3, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dp4(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dp4, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dph(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dph, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dst(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dst, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn ex2(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .ex2, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn lg2(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .lg2, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn litp(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .litp, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn mul(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .mul, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn sge(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .sge, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn slt(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .slt, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn flr(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .flr, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn max(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .max, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn min(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .min, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn rcp(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .rcp, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn rsq(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .rsq, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn mova(encoder: *Encoder, alloc: Allocator, a_mask: register.AddressComponent.Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .mova, .o0, .{ .enable_x = a_mask.enable_x, .enable_y = a_mask.enable_y }, src1_neg, src1, src1_selector, src_rel); +} + +pub fn mov(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .mov, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +// dphi handled by dph +// dsti handled by dst +// sgei handled by sge +// slti handled by slt + +pub fn @"break"(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .@"break"); +} + +pub fn nop(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .nop); +} + +pub fn end(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .end); +} + +pub fn breakc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool) !void { + return encoder.flow(alloc, .breakc, 0, 0, condition, x, y); +} + +pub fn call(encoder: *Encoder, alloc: Allocator, dest: u12, num: u8) !void { + return encoder.flow(alloc, .call, num, dest, .@"and", false, false); +} + +pub fn callc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool, dest: u12, num: u8) !void { + return encoder.flow(alloc, .callc, num, dest, condition, x, y); +} + +pub fn callu(encoder: *Encoder, alloc: Allocator, b: BooleanRegister, dest: u12, num: u8) !void { + return encoder.flowConstant(alloc, .callu, num, dest, .{ .bool = b }); +} + +pub fn ifu(encoder: *Encoder, alloc: Allocator, b: BooleanRegister, dest: u12, num: u8) !void { + return encoder.flowConstant(alloc, .ifu, num, dest, .{ .bool = b }); +} + +pub fn ifc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool, dest: u12, num: u8) !void { + return encoder.flow(alloc, .ifc, num, dest, condition, x, y); +} + +pub fn loop(encoder: *Encoder, alloc: Allocator, i: IntegerRegister, dest: u12) !void { + return encoder.flowConstant(alloc, .loop, 0, dest, .{ .int = .{ .used = i } }); +} + +pub fn setemit(encoder: *Encoder, alloc: Allocator, vertex_id: u2, primitive: Primitive, winding: Winding) !void { + return encoder.addInstruction(alloc, .{ .set_emit = .{ + .winding = winding, + .primitive_emit = primitive, + .vertex_id = vertex_id, + .opcode = .setemit, + } }); +} + +pub fn emit(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .emit); +} + +pub fn jmpc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool, dest: u12) !void { + return encoder.flow(alloc, .jmpc, 0, dest, condition, x, y); +} + +pub fn jmpu(encoder: *Encoder, alloc: Allocator, b: BooleanRegister, if_true: bool, dest: u12) !void { + return encoder.flowConstant(alloc, .jmpu, @intFromBool(!if_true), dest, .{ .bool = b }); +} + +pub fn cmp(encoder: *Encoder, alloc: Allocator, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, x: Comparison, y: Comparison, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + if (!src1.isLimited() and !src2.isLimited()) { + return error.InvalidSourceRegisterCombination; + } + + const descriptor_id, const i_src1, const i_src2, const x_cmp, const y_cmp = if (!src2.isLimited()) + .{ try encoder.getOrAllocateOperandDescriptor(u7, .comparison, .{ + .src1_neg = src2_neg, + .src1_selector = src2_selector, + .src2_neg = src1_neg, + .src2_selector = src1_selector, + }), src2, src1.toLimited().?, x.invert(), y.invert() } + else + .{ try encoder.getOrAllocateOperandDescriptor(u7, .comparison, .{ + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + }), src1, src2.toLimited().?, x, y }; + + return encoder.addInstruction(alloc, .{ .comparison = .{ + .operand_descriptor_id = descriptor_id, + .src2 = i_src2, + .src1 = i_src1, + .address_component = src_rel, + .x_operation = x_cmp, + .y_operation = y_cmp, + .opcode = Instruction.Opcode.cmp0.toComparison().?, + } }); +} + +// madi handled by mad + +pub fn mad(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister.Limited, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src3_neg: Negation, src3: SourceRegister, src3_selector: Selector, src_rel: RelativeComponent) !void { + if (!src2.isLimited() and !src3.isLimited()) { + return error.InvalidSourceRegisterCombination; + } + + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u5, .full, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + .src3_neg = src3_neg, + .src3_selector = src3_selector, + }); + + if (src2.isLimited() != src3.isLimited() and src2.isLimited()) { + return try encoder.addInstruction(alloc, .{ .mad_inverted = .{ + .operand_descriptor_id = descriptor_id, + .src1 = src1, + .src2 = src2.toLimited().?, + .src3 = src3, + .address_component = src_rel, + .dst = dest, + .opcode = Instruction.Opcode.madi0.toMad().?, + } }); + } + + return try encoder.addInstruction(alloc, .{ .mad = .{ + .operand_descriptor_id = descriptor_id, + .src1 = src1, + .src2 = src2, + .src3 = src3.toLimited().?, + .address_component = src_rel, + .dst = dest, + .opcode = Instruction.Opcode.mad0.toMad().?, + } }); +} + +test "test?" { + var fixed: [256]u8 = undefined; + var fba: std.heap.FixedBufferAllocator = .init(&fixed); + const alloc = fba.allocator(); + + const expected_output: []const u32 = &.{ + 0b000000_10000_00_0000000_00001_0000000, + 0b001011_10001_00_0010000_00000_0000000, + 0b001000_10001_00_0000011_01000_0000001, + }; + + var encoder: Encoder = .init; + defer encoder.deinit(alloc); + + try encoder.add(alloc, .r0, .x, .@"+", .v0, .xyzw, .@"+", .v1, .xyzw, .none); + + // Must have same descriptor as the previous instruction + try encoder.flr(alloc, .r1, .x, .@"+", .r0, .xyzw, .none); + + // Should create a new descriptor + try encoder.mul(alloc, .r1, .x, .@"+", .v3, .wyxz, .@"+", .v8, .xxxx, .none); + + // FIXME: Regression, cannot use this on the 3DS test runner. + // try testing.expectEqualSlices(u32, expected_output, std.mem.bytesAsSlice(u32, std.mem.sliceAsBytes(encoder.instructions.items))); + for (expected_output, encoder.instructions.items) |expected, output| { + try testing.expect(expected == @as(u32, @bitCast(output))); + } +} + +const Encoder = @This(); + +const std = @import("std"); +const testing = std.testing; + +const Allocator = std.mem.Allocator; + +const zitrus = @import("zitrus"); +const shader = zitrus.hardware.pica.shader; + +const encoding = shader.encoding; +const Instruction = encoding.Instruction; +const OperandDescriptor = encoding.OperandDescriptor; +const Negation = OperandDescriptor.Negation; +const Condition = encoding.Condition; +const Comparison = encoding.ComparisonOperation; +const Winding = encoding.Winding; +const Primitive = encoding.Primitive; + +const Mask = encoding.Component.Mask; +const Selector = encoding.Component.Selector; + +const register = shader.register; +const RelativeComponent = register.AddressComponent; +const SourceRegister = register.Source; +const DestinationRegister = register.Destination; + +const IntegralRegister = register.Integral; +const BooleanRegister = IntegralRegister.Boolean; +const IntegerRegister = IntegralRegister.Integer; diff --git a/src/platform/3ds/mango/hardware/pica/shader/as.zig b/src/platform/3ds/mango/hardware/pica/shader/as.zig new file mode 100644 index 0000000..f926eda --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader/as.zig @@ -0,0 +1,13 @@ +//! Zitrus PICA200 shader assembler / disassembler. + +pub const Tokenizer = tokenizer.Tokenizer; +pub const Token = tokenizer.Token; +pub const Assembler = @import("as/Assembler.zig"); + +comptime { + _ = Tokenizer; + _ = Token; + _ = Assembler; +} + +const tokenizer = @import("as/tokenizer.zig"); diff --git a/src/platform/3ds/mango/hardware/pica/shader/as/Assembler.zig b/src/platform/3ds/mango/hardware/pica/shader/as/Assembler.zig new file mode 100644 index 0000000..dd5e2ea --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader/as/Assembler.zig @@ -0,0 +1,1468 @@ +//! Zitrus PICA200 shader assembler. + +// TODO: Last one, we're missing a proper diagnostic for using two floating constant registers (one must be limited) but that is easy! + +pub const TokenList = std.MultiArrayList(struct { + tag: Token.Tag, + start: u32, +}); + +pub const LabelMap = std.StringArrayHashMapUnmanaged(u12); +pub const Outputs = std.EnumMap(register.Destination.Output, pica.OutputMap); + +pub const FloatingConstants = std.EnumMap(register.Source.Constant, pica.F7_16x4); +pub const IntegerConstants = std.EnumMap(register.Integral.Integer, [4]u8); +pub const BooleanConstants = std.EnumSet(register.Integral.Boolean); + +pub const Assembled = struct { + pub const Entrypoint = struct { + pub const Map = std.StringArrayHashMapUnmanaged(Assembled.Entrypoint); + pub const Info = union(pica.shader.Type) { + vertex, + geometry: pica.shader.Geometry, + }; + + pub const Constants = struct { + pub const empty: Constants = .{ + .float = .init(.{}), + .int = .init(.{}), + .bool = .init(.{}), + }; + + float: FloatingConstants, + int: IntegerConstants, + bool: BooleanConstants, + }; + + info: Info, + constants: Constants, + outputs: Outputs, + offset: u16, + }; + + source: [:0]const u8, + tokens: TokenList.Slice, + entrypoints: Assembled.Entrypoint.Map, + encoded: Encoder, + + errors: []const Error, + + pub fn deinit(assembled: *Assembled, gpa: std.mem.Allocator) void { + assembled.tokens.deinit(gpa); + assembled.entrypoints.deinit(gpa); + assembled.encoded.deinit(gpa); + gpa.free(assembled.errors); + assembled.* = undefined; + } + + pub fn tokenTag(a: Assembled, tok_index: usize) Token.Tag { + return a.tokens.items(.tag)[tok_index]; + } + + pub fn tokenStart(a: Assembled, tok_index: usize) u32 { + return a.tokens.items(.start)[tok_index]; + } + + pub fn tokenSlice(a: Assembled, tok_index: usize) []const u8 { + const tok_tag = a.tokenTag(tok_index); + + if (tok_tag.lexeme()) |lexeme| { + return lexeme; + } + + const tok_start = a.tokenStart(tok_index); + var tokenizer: shader.as.Tokenizer = .{ + .buffer = a.source, + .index = tok_start, + }; + + const tok = tokenizer.next(); + std.debug.assert(tok.tag == tok_tag); + return a.source[tok.loc.start..tok.loc.end]; + } + + pub fn assemble(gpa: std.mem.Allocator, source: [:0]const u8) !Assembled { + var tokens = TokenList{}; + defer tokens.deinit(gpa); + + { + var tokenizer: shader.as.Tokenizer = .init(source); + + while (true) { + const tok = tokenizer.next(); + + try tokens.append(gpa, .{ + .tag = tok.tag, + .start = @intCast(tok.loc.start), + }); + + if (tok.tag == .eof) { + break; + } + } + } + + var assembler: Assembler = .{ + .gpa = gpa, + .aliases = .empty, + .errors = .empty, + .source = source, + .tokens = tokens.toOwnedSlice(), + .encoder = .init, + .labels = .empty, + .entrypoints = .empty, + .tok_i = 0, + .inst_i = 0, + }; + defer assembler.deinit(gpa); + + assembler.passRoot() catch |e| switch (e) { + error.ParseError => {}, + else => return e, + }; + + var entrypoints: Assembled.Entrypoint.Map = .empty; + errdefer entrypoints.deinit(gpa); + + if (assembler.errors.items.len == 0) assemble: { + assembler.passAssemble() catch |e| switch (e) { + error.ParseError => break :assemble, + else => return e, + }; + + var it = assembler.entrypoints.iterator(); + while (it.next()) |entry| { + const label_offset = assembler.labels.get(entry.key_ptr.*) orelse { + try assembler.warnMsg(.{ + .tag = .undefined_label, + .tok_i = entry.value_ptr.*.tok_i, + }); + + continue; + }; + + try entrypoints.put(gpa, entry.key_ptr.*, .{ + .info = entry.value_ptr.info, + .constants = entry.value_ptr.constants, + .outputs = entry.value_ptr.outputs, + .offset = label_offset, + }); + } + } + + return .{ + .source = assembler.source, + .tokens = assembler.tokens, + .encoded = assembler.encoder.move(), + .entrypoints = entrypoints, + .errors = try assembler.errors.toOwnedSlice(gpa), + }; + } +}; + +pub const Error = struct { + tag: Tag, + tok_i: u32, + expected_tok: Token.Tag = .invalid, + + pub const Tag = enum { + unknown_directive, + invalid_register, + expected_address_register, + invalid_address_register_mask, + expected_address_component, + cannot_address_relative, + + expected_condition_register, + invalid_condition_register_mask, + + expected_src_register, + expected_limited_src_register, + expected_dst_register, + expected_bool_register, + expected_int_register, + expected_float_register, + expected_output_register, + expected_uniform_register, + + invalid_mask, + swizzled_mask, + invalid_swizzle, + cannot_swizzle, + + expected_number, + number_too_small, + number_too_big, + + expected_semantic, + invalid_semantic_component, + output_has_semantic, + + expected_primitive, + expected_winding, + expected_comparison, + expected_condition, + expected_boolean, + expected_shader_type, + expected_geometry_kind, + + redefined_label, + undefined_label, + label_range_too_big, + + redefined_entry, + undefined_entry, + + expected_directive_or_label_or_mnemonic, + expected_token, + }; +}; + +const Entrypoint = struct { + pub const Map = std.StringArrayHashMapUnmanaged(Entrypoint); + + info: Assembled.Entrypoint.Info, + constants: Assembled.Entrypoint.Constants, + outputs: Outputs, + tok_i: u32, +}; + +const Directive = enum { + /// .entry