From 33da5c3f218fbb83f23ef407a11b5eae84afbe64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Thu, 4 Jun 2026 20:57:42 +0800 Subject: [PATCH 1/7] feat: update Walnut submodule with Vulkan external memory support --- Walnut | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Walnut b/Walnut index 3b8e414..12814d1 160000 --- a/Walnut +++ b/Walnut @@ -1 +1 @@ -Subproject commit 3b8e414fdecfc6c8b58816106fe8d912bd172e31 +Subproject commit 12814d1f7bb19d5752609eb315762b4e2bc6ff2c From 6ce0a4c584a7fe558959ac1b9b4cce04ebd2ec9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Thu, 4 Jun 2026 21:00:15 +0800 Subject: [PATCH 2/7] feat: add VkCUDAInterop RAII wrapper for Vulkan-CUDA external memory sharing --- RayTracing/src/VkCUDAInterop.cpp | 110 +++++++++++++++++++++++++++++++ RayTracing/src/VkCUDAInterop.h | 33 ++++++++++ 2 files changed, 143 insertions(+) create mode 100644 RayTracing/src/VkCUDAInterop.cpp create mode 100644 RayTracing/src/VkCUDAInterop.h diff --git a/RayTracing/src/VkCUDAInterop.cpp b/RayTracing/src/VkCUDAInterop.cpp new file mode 100644 index 0000000..3bcffe2 --- /dev/null +++ b/RayTracing/src/VkCUDAInterop.cpp @@ -0,0 +1,110 @@ +#include "VkCUDAInterop.h" +#include "Walnut/Application.h" +#include +#include + +static uint32_t FindMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties) +{ + VkPhysicalDeviceMemoryProperties memProps; + vkGetPhysicalDeviceMemoryProperties(Walnut::Application::GetPhysicalDevice(), &memProps); + for (uint32_t i = 0; i < memProps.memoryTypeCount; i++) + if ((typeFilter & (1 << i)) && + (memProps.memoryTypes[i].propertyFlags & properties) == properties) + return i; + throw std::runtime_error("VkCUDAInterop: no suitable memory type"); +} + +VkCUDAInterop::VkCUDAInterop(uint32_t width, uint32_t height) + : m_Width(width), m_Height(height), m_Size(width * height * sizeof(uint32_t)) +{ + CreateVulkanBuffer(); + ExportToCUDA(); +} + +VkCUDAInterop::~VkCUDAInterop() +{ + VkDevice device = Walnut::Application::GetDevice(); + if (m_CUDADevPtr) cudaFree(m_CUDADevPtr); + if (m_CUDAExtMem) cudaDestroyExternalMemory(m_CUDAExtMem); + if (m_Memory) vkFreeMemory(device, m_Memory, nullptr); + if (m_Buffer) vkDestroyBuffer(device, m_Buffer, nullptr); +} + +void VkCUDAInterop::CreateVulkanBuffer() +{ + VkDevice device = Walnut::Application::GetDevice(); + + VkExternalMemoryBufferCreateInfo extInfo = {}; + extInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO; + extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + + VkBufferCreateInfo bufferInfo = {}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.pNext = &extInfo; + bufferInfo.size = m_Size; + bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + if (vkCreateBuffer(device, &bufferInfo, nullptr, &m_Buffer) != VK_SUCCESS) + throw std::runtime_error("VkCUDAInterop: vkCreateBuffer failed"); + + VkMemoryRequirements memReq; + vkGetBufferMemoryRequirements(device, m_Buffer, &memReq); + + VkExportMemoryAllocateInfo exportInfo = {}; + exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + + VkMemoryAllocateInfo allocInfo = {}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.pNext = &exportInfo; + allocInfo.allocationSize = memReq.size; + allocInfo.memoryTypeIndex = FindMemoryType(memReq.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + if (vkAllocateMemory(device, &allocInfo, nullptr, &m_Memory) != VK_SUCCESS) + throw std::runtime_error("VkCUDAInterop: vkAllocateMemory failed"); + + vkBindBufferMemory(device, m_Buffer, m_Memory, 0); +} + +void VkCUDAInterop::ExportToCUDA() +{ + VkDevice device = Walnut::Application::GetDevice(); + + auto vkGetMemoryWin32HandleKHR = (PFN_vkGetMemoryWin32HandleKHR) + vkGetDeviceProcAddr(device, "vkGetMemoryWin32HandleKHR"); + if (!vkGetMemoryWin32HandleKHR) + throw std::runtime_error("VkCUDAInterop: vkGetMemoryWin32HandleKHR not available"); + + VkMemoryGetWin32HandleInfoKHR handleInfo = {}; + handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handleInfo.memory = m_Memory; + handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + + HANDLE handle = nullptr; + if (vkGetMemoryWin32HandleKHR(device, &handleInfo, &handle) != VK_SUCCESS) + throw std::runtime_error("VkCUDAInterop: vkGetMemoryWin32HandleKHR failed"); + + cudaExternalMemoryHandleDesc extMemDesc = {}; + extMemDesc.type = cudaExternalMemoryHandleTypeOpaqueWin32; + extMemDesc.handle.win32.handle = handle; + extMemDesc.size = m_Size; + extMemDesc.flags = cudaExternalMemoryDedicated; + + cudaError_t err = cudaImportExternalMemory(&m_CUDAExtMem, &extMemDesc); + CloseHandle(handle); + if (err != cudaSuccess) + throw std::runtime_error(std::string("VkCUDAInterop: cudaImportExternalMemory failed: ") + cudaGetErrorString(err)); + + cudaExternalMemoryBufferDesc bufDesc = {}; + bufDesc.size = m_Size; + + err = cudaExternalMemoryGetMappedBuffer(&m_CUDADevPtr, m_CUDAExtMem, &bufDesc); + if (err != cudaSuccess) + throw std::runtime_error(std::string("VkCUDAInterop: cudaExternalMemoryGetMappedBuffer failed: ") + cudaGetErrorString(err)); +} + +void VkCUDAInterop::SyncCUDAComplete(cudaStream_t stream) +{ + cudaStreamSynchronize(stream); +} diff --git a/RayTracing/src/VkCUDAInterop.h b/RayTracing/src/VkCUDAInterop.h new file mode 100644 index 0000000..16f9e82 --- /dev/null +++ b/RayTracing/src/VkCUDAInterop.h @@ -0,0 +1,33 @@ +#pragma once + +#define VK_USE_PLATFORM_WIN32_KHR +#include +#include +#include + +class VkCUDAInterop +{ +public: + VkCUDAInterop(uint32_t width, uint32_t height); + ~VkCUDAInterop(); + + void* GetCUDADevicePtr() const { return m_CUDADevPtr; } + VkBuffer GetVulkanBuffer() const { return m_Buffer; } + uint32_t GetWidth() const { return m_Width; } + uint32_t GetHeight() const { return m_Height; } + + void SyncCUDAComplete(cudaStream_t stream); + +private: + void CreateVulkanBuffer(); + void ExportToCUDA(); + + uint32_t m_Width, m_Height; + VkDeviceSize m_Size; + + VkBuffer m_Buffer = VK_NULL_HANDLE; + VkDeviceMemory m_Memory = VK_NULL_HANDLE; + + cudaExternalMemory_t m_CUDAExtMem = nullptr; + void* m_CUDADevPtr = nullptr; +}; From fc24c6bb7f00b33f0be9d3729eff98e528af4414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Thu, 4 Jun 2026 21:03:14 +0800 Subject: [PATCH 3/7] =?UTF-8?q?feat:=20add=20interop=20output=20buffer=20s?= =?UTF-8?q?upport=20in=20CUDA=20renderer=20=E2=80=94=20skip=20D2H=20copy?= =?UTF-8?q?=20when=20interop=20active?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RayTracing/src/CUDARenderer.cu | 22 +++++++++++++++++++--- RayTracing/src/CUDARenderer.h | 8 +++++++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/RayTracing/src/CUDARenderer.cu b/RayTracing/src/CUDARenderer.cu index 9a656cf..72e2160 100644 --- a/RayTracing/src/CUDARenderer.cu +++ b/RayTracing/src/CUDARenderer.cu @@ -17,7 +17,8 @@ struct CUDARenderState float4* d_SampleBuffer; // Device per-pixel sample buffer (raw path trace output) float4* d_DenoiseBuffer; // Device per-pixel denoise buffer (averaged HDR, for OptiX) float4* d_AccumulationBuffer; // Device accumulation buffer - uint32_t* d_OutputImage; // Device output RGBA buffer + uint32_t* d_OutputImage; // Device output RGBA buffer (legacy, null if interop) + void* d_InteropBuffer; // External device buffer (Vulkan-CUDA interop, nullptr if disabled) GPUSphere* d_Spheres; // Device scene spheres GPUMaterial* d_Materials; // Device scene materials @@ -50,6 +51,7 @@ CUDARenderState* CUDARenderer_Create() state->d_DenoiseBuffer = nullptr; state->d_AccumulationBuffer = nullptr; state->d_OutputImage = nullptr; + state->d_InteropBuffer = nullptr; state->d_Spheres = nullptr; state->d_Materials = nullptr; state->d_RayDirections = nullptr; @@ -278,7 +280,7 @@ void CUDARenderer_Render( state->d_SampleBuffer, state->d_AccumulationBuffer, state->d_DenoiseBuffer, - state->d_OutputImage, + state->d_InteropBuffer ? (uint32_t*)state->d_InteropBuffer : state->d_OutputImage, frameIndex, state->pixelCount ); @@ -349,7 +351,7 @@ void CUDARenderer_DebugFill(CUDARenderState* state) ); DebugFillKernel<<>>( - state->d_OutputImage, + state->d_InteropBuffer ? (uint32_t*)state->d_InteropBuffer : state->d_OutputImage, state->imageWidth, state->imageHeight ); @@ -357,6 +359,20 @@ void CUDARenderer_DebugFill(CUDARenderState* state) cudaDeviceSynchronize(); } +// ─── Vulkan-CUDA Interop API ─── + +void CUDARenderer_SetOutputBuffer(CUDARenderState* state, void* devPtr) +{ + if (!state) return; + state->d_InteropBuffer = devPtr; +} + +void CUDARenderer_Sync(CUDARenderState* state) +{ + if (!state || !state->initialized) return; + cudaStreamSynchronize(state->computeStream); +} + void CUDARenderer_CheckError(const char* file, int line) { cudaError_t err = cudaGetLastError(); diff --git a/RayTracing/src/CUDARenderer.h b/RayTracing/src/CUDARenderer.h index e35d237..73df838 100644 --- a/RayTracing/src/CUDARenderer.h +++ b/RayTracing/src/CUDARenderer.h @@ -42,11 +42,17 @@ void CUDARenderer_SetCameraPosition( // Render one frame (launches CUDA kernel) void CUDARenderer_Render(CUDARenderState* state, uint32_t frameIndex); -// Download output image (device → host) +// Download output image (device → host) — legacy, skipped when interop active void CUDARenderer_GetOutput( CUDARenderState* state, void* hostOutput, uint32_t byteSize); +// Vulkan-CUDA interop: set externally-allocated output buffer (null = use d_OutputImage) +void CUDARenderer_SetOutputBuffer(CUDARenderState* state, void* devPtr); + +// Synchronize CUDA compute stream (replaces blocking GetOutput when interop active) +void CUDARenderer_Sync(CUDARenderState* state); + // Get denoise buffer pointer (float4* HDR, for OptiX denoiser) void* CUDARenderer_GetDenoiseBuffer(CUDARenderState* state); From 2f5cc2f760cd40b8076f784d4c5d24e7330c2524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Thu, 4 Jun 2026 21:10:07 +0800 Subject: [PATCH 4/7] =?UTF-8?q?feat:=20integrate=20VkCUDAInterop=20into=20?= =?UTF-8?q?render=20pipeline=20=E2=80=94=20bypass=20CPU=20copy=20with=20Im?= =?UTF-8?q?Gui=20toggle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RayTracing/src/Renderer.cpp | 102 ++++++++++++++++++++++++++++++++--- RayTracing/src/Renderer.h | 6 +++ RayTracing/src/WalnutApp.cpp | 4 ++ 3 files changed, 105 insertions(+), 7 deletions(-) diff --git a/RayTracing/src/Renderer.cpp b/RayTracing/src/Renderer.cpp index 7067a20..4d38581 100644 --- a/RayTracing/src/Renderer.cpp +++ b/RayTracing/src/Renderer.cpp @@ -2,6 +2,10 @@ #include "Walnut/Random.h" +#ifdef WL_CUDA +#include "Walnut/Application.h" +#endif + #include #include #include @@ -181,6 +185,15 @@ void Renderer::OnResize(uint32_t width, uint32_t height) if (cudaInitialized) { CUDARenderer_OnResize(m_CUDAState, width, height); + + // Recreate interop buffer on resize when enabled + if (m_InteropEnabled) + { + try { m_Interop = std::make_unique(width, height); } + catch (...) { m_Interop.reset(); m_InteropEnabled = false; } + if (m_Interop) + CUDARenderer_SetOutputBuffer(m_CUDAState, m_Interop->GetCUDADevicePtr()); + } } #endif @@ -349,7 +362,50 @@ void Renderer::Render(const Scene& scene, const Camera& camera) #endif // WL_ISPC #endif // !WL_CUDA - m_FinalImage->SetData(m_ImageData); + if (m_InteropEnabled && m_Interop) + { + // Interop path: CUDA wrote to Vulkan buffer, copy to Walnut's VkImage + VkCommandBuffer cmd = Walnut::Application::GetCommandBuffer(true); + VkImage dstImage = m_FinalImage->GetImage(); + + VkImageMemoryBarrier preBarrier = {}; + preBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + preBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + preBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + preBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.image = dstImage; + preBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + preBarrier.srcAccessMask = 0; + preBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &preBarrier); + + VkBufferImageCopy region = {}; + region.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; + region.imageExtent = { m_Interop->GetWidth(), m_Interop->GetHeight(), 1 }; + vkCmdCopyBufferToImage(cmd, m_Interop->GetVulkanBuffer(), dstImage, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + VkImageMemoryBarrier postBarrier = {}; + postBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + postBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + postBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + postBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.image = dstImage; + postBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; + postBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + postBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &postBarrier); + + Walnut::Application::FlushCommandBuffer(cmd); + } + else + { + m_FinalImage->SetData(m_ImageData); + } if (m_Settings.Accumulate) m_FrameIndex++; @@ -573,6 +629,31 @@ void Renderer::RenderGPU(const Scene& scene, const Camera& camera) const uint32_t height = m_FinalImage->GetHeight(); if (width == 0 || height == 0) return; + // Sync interop toggle from settings (requires re-creation on enable) + if (m_Settings.EnableInterop != m_InteropEnabled) + { + m_InteropEnabled = m_Settings.EnableInterop; + if (m_InteropEnabled) + { + try + { + m_Interop = std::make_unique(width, height); + CUDARenderer_SetOutputBuffer(m_CUDAState, m_Interop->GetCUDADevicePtr()); + } + catch (...) + { + m_Interop.reset(); + m_InteropEnabled = false; + m_Settings.EnableInterop = false; + } + } + else + { + m_Interop.reset(); + CUDARenderer_SetOutputBuffer(m_CUDAState, nullptr); + } + } + // Upload scene data only when changed (tracked by scene version) if (scene.Version != m_LastSceneVersion) { @@ -631,12 +712,19 @@ void Renderer::RenderGPU(const Scene& scene, const Camera& camera) } #endif - // Download output image from GPU - CUDARenderer_GetOutput( - m_CUDAState, - m_ImageData, - width * height * sizeof(uint32_t) - ); + // Download output image from GPU (skip D2H when interop writes directly to Vulkan) + if (m_InteropEnabled && m_Interop) + { + m_Interop->SyncCUDAComplete((cudaStream_t)CUDARenderer_GetComputeStream(m_CUDAState)); + } + else + { + CUDARenderer_GetOutput( + m_CUDAState, + m_ImageData, + width * height * sizeof(uint32_t) + ); + } } #endif // WL_CUDA diff --git a/RayTracing/src/Renderer.h b/RayTracing/src/Renderer.h index e6d075b..3d9585d 100644 --- a/RayTracing/src/Renderer.h +++ b/RayTracing/src/Renderer.h @@ -12,6 +12,7 @@ #ifdef WL_CUDA #include "CUDARenderer.h" +#include "VkCUDAInterop.h" #endif #include @@ -31,6 +32,9 @@ class Renderer bool SlowRandom = true; #ifdef WL_OPTIX bool EnableDenoising = true; +#endif +#ifdef WL_CUDA + bool EnableInterop = false; #endif }; @@ -97,6 +101,8 @@ class Renderer #ifdef WL_CUDA CUDARenderState* m_CUDAState = nullptr; + std::unique_ptr m_Interop; + bool m_InteropEnabled = false; std::vector m_GPUSpheres; std::vector m_GPUMaterials; std::vector m_GPURayDirs; diff --git a/RayTracing/src/WalnutApp.cpp b/RayTracing/src/WalnutApp.cpp index 95ab374..fac80b9 100644 --- a/RayTracing/src/WalnutApp.cpp +++ b/RayTracing/src/WalnutApp.cpp @@ -104,6 +104,10 @@ class ExampleLayer final : public Walnut::Layer if (ImGui::Checkbox("Denoise", &m_Renderer.GetSettings().EnableDenoising)) m_NeedsRender = true; #endif +#ifdef WL_CUDA + if (ImGui::Checkbox("Vulkan-CUDA Interop", &m_Renderer.GetSettings().EnableInterop)) + m_NeedsRender = true; +#endif if (ImGui::Button("Reset")) { From f666aa31eabc9480716ecc1fe0af1d74ec6a96fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Thu, 4 Jun 2026 21:17:54 +0800 Subject: [PATCH 5/7] fix: remove cudaFree on external memory, add VkMemoryDedicatedAllocateInfo, add buffer memory barrier, guard interop with WL_CUDA --- RayTracing/src/Renderer.cpp | 16 ++++++++++++++++ RayTracing/src/VkCUDAInterop.cpp | 11 ++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/RayTracing/src/Renderer.cpp b/RayTracing/src/Renderer.cpp index 4d38581..a75f315 100644 --- a/RayTracing/src/Renderer.cpp +++ b/RayTracing/src/Renderer.cpp @@ -362,12 +362,27 @@ void Renderer::Render(const Scene& scene, const Camera& camera) #endif // WL_ISPC #endif // !WL_CUDA +#ifdef WL_CUDA if (m_InteropEnabled && m_Interop) { // Interop path: CUDA wrote to Vulkan buffer, copy to Walnut's VkImage VkCommandBuffer cmd = Walnut::Application::GetCommandBuffer(true); VkImage dstImage = m_FinalImage->GetImage(); + // Buffer barrier: external (CUDA) write → Vulkan transfer read + VkBufferMemoryBarrier bufBarrier = {}; + bufBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + bufBarrier.srcAccessMask = 0; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufBarrier.buffer = m_Interop->GetVulkanBuffer(); + bufBarrier.size = VK_WHOLE_SIZE; + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 1, &bufBarrier, 0, nullptr); + VkImageMemoryBarrier preBarrier = {}; preBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; preBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -403,6 +418,7 @@ void Renderer::Render(const Scene& scene, const Camera& camera) Walnut::Application::FlushCommandBuffer(cmd); } else +#endif { m_FinalImage->SetData(m_ImageData); } diff --git a/RayTracing/src/VkCUDAInterop.cpp b/RayTracing/src/VkCUDAInterop.cpp index 3bcffe2..89edd45 100644 --- a/RayTracing/src/VkCUDAInterop.cpp +++ b/RayTracing/src/VkCUDAInterop.cpp @@ -24,7 +24,6 @@ VkCUDAInterop::VkCUDAInterop(uint32_t width, uint32_t height) VkCUDAInterop::~VkCUDAInterop() { VkDevice device = Walnut::Application::GetDevice(); - if (m_CUDADevPtr) cudaFree(m_CUDADevPtr); if (m_CUDAExtMem) cudaDestroyExternalMemory(m_CUDAExtMem); if (m_Memory) vkFreeMemory(device, m_Memory, nullptr); if (m_Buffer) vkDestroyBuffer(device, m_Buffer, nullptr); @@ -42,7 +41,7 @@ void VkCUDAInterop::CreateVulkanBuffer() bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufferInfo.pNext = &extInfo; bufferInfo.size = m_Size; - bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; if (vkCreateBuffer(device, &bufferInfo, nullptr, &m_Buffer) != VK_SUCCESS) throw std::runtime_error("VkCUDAInterop: vkCreateBuffer failed"); @@ -50,13 +49,19 @@ void VkCUDAInterop::CreateVulkanBuffer() VkMemoryRequirements memReq; vkGetBufferMemoryRequirements(device, m_Buffer, &memReq); + VkMemoryDedicatedAllocateInfo dedicatedInfo = {}; + dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + dedicatedInfo.buffer = m_Buffer; + VkExportMemoryAllocateInfo exportInfo = {}; exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + exportInfo.pNext = nullptr; + dedicatedInfo.pNext = &exportInfo; VkMemoryAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocInfo.pNext = &exportInfo; + allocInfo.pNext = &dedicatedInfo; allocInfo.allocationSize = memReq.size; allocInfo.memoryTypeIndex = FindMemoryType(memReq.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); From 677bc2141dbe001b1c845abef7d9603d94e674a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Thu, 4 Jun 2026 21:27:05 +0800 Subject: [PATCH 6/7] refactor: replace Walnut submodule URL with Cle2ment/Walnut fork --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 3bfad55..aaa91ae 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "Walnut"] path = Walnut - url = https://github.com/TheCherno/Walnut + url = https://github.com/Cle2ment/Walnut.git From 7829761db108626455b5fc471520d6139dfa2885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20CHEN?= Date: Thu, 4 Jun 2026 21:47:05 +0800 Subject: [PATCH 7/7] =?UTF-8?q?fix:=20swap=20include=20order=20=E2=80=94?= =?UTF-8?q?=20VkCUDAInterop.h=20before=20CUDARenderer.h=20to=20predefine?= =?UTF-8?q?=20CUDART=5FVERSION,=20resolving=20float3=20redefinition=20on?= =?UTF-8?q?=20CI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RayTracing/src/Renderer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RayTracing/src/Renderer.h b/RayTracing/src/Renderer.h index 3d9585d..8c5b869 100644 --- a/RayTracing/src/Renderer.h +++ b/RayTracing/src/Renderer.h @@ -11,8 +11,8 @@ #endif #ifdef WL_CUDA +#include "VkCUDAInterop.h" // Must precede CUDARenderer.h — defines CUDART_VERSION so float3 guard works #include "CUDARenderer.h" -#include "VkCUDAInterop.h" #endif #include