diff --git a/RayTracing/src/CUDARenderer.cuh b/RayTracing/src/CUDARenderer.cuh index 4d3dd80..04dac9e 100644 --- a/RayTracing/src/CUDARenderer.cuh +++ b/RayTracing/src/CUDARenderer.cuh @@ -50,6 +50,80 @@ __device__ inline void BuildONB(const float3& n, float3& u, float3& v, float3& w v = make_float3(b, sign + w.y * w.y * a, -w.y); } +// ────────────────────────────────────────────── +// GGX Microfacet BRDF +// ────────────────────────────────────────────── + +__device__ inline float3 FresnelSchlick(float cosTheta, float3 F0) +{ + float t = fmaxf(1.0f - cosTheta, 0.0f); + float t5 = t * t * t * t * t; + return make_float3( + F0.x + (1.0f - F0.x) * t5, + F0.y + (1.0f - F0.y) * t5, + F0.z + (1.0f - F0.z) * t5 + ); +} + +__device__ inline float GGX_D(float NdotH, float a) +{ + float a2 = a * a; + float d = NdotH * NdotH * (a2 - 1.0f) + 1.0f; + return a2 / (3.14159265358979323846f * d * d); +} + +__device__ inline float GGX_G1(float NdotV, float a) +{ + float a2 = a * a; + float denom = NdotV + sqrtf(NdotV * NdotV * (1.0f - a2) + a2); + return 2.0f * NdotV / fmaxf(denom, 0.0001f); +} + +__device__ inline float GGX_G(float NdotL, float NdotV, float a) +{ + return GGX_G1(NdotL, a) * GGX_G1(NdotV, a); +} + +// VNDF sampling: GGX visible normal distribution +__device__ inline float3 SampleGGX_VNDF(float3 V, float a, float r1, float r2) +{ + float3 Vh = make_float3(a * V.x, a * V.y, V.z); + float vhLen = sqrtf(Vh.x*Vh.x + Vh.y*Vh.y + Vh.z*Vh.z); + Vh = make_float3(Vh.x/vhLen, Vh.y/vhLen, Vh.z/vhLen); + + float3 up = make_float3(0.0f, 0.0f, 1.0f); + float3 T1; + if (Vh.z < 0.9999f) { + T1 = make_float3(-Vh.y, Vh.x, 0.0f); + float t1Len = sqrtf(T1.x*T1.x + T1.y*T1.y); + T1 = make_float3(T1.x/t1Len, T1.y/t1Len, 0.0f); + } else { + T1 = make_float3(1.0f, 0.0f, 0.0f); + } + float3 T2 = make_float3( + T1.y*Vh.z - T1.z*Vh.y, + T1.z*Vh.x - T1.x*Vh.z, + T1.x*Vh.y - T1.y*Vh.x + ); + + float r = sqrtf(r1); + float phi = 2.0f * 3.14159265358979323846f * r2; + float t1 = r * cosf(phi); + float t2 = r * sinf(phi); + float s = 0.5f * (1.0f + Vh.z); + t2 = (1.0f - s) * sqrtf(fmaxf(1.0f - t1 * t1, 0.0f)) + s * t2; + + float3 Nh = make_float3( + t1 * T1.x + t2 * T2.x + sqrtf(fmaxf(1.0f - t1 * t1 - t2 * t2, 0.0f)) * Vh.x, + t1 * T1.y + t2 * T2.y + sqrtf(fmaxf(1.0f - t1 * t1 - t2 * t2, 0.0f)) * Vh.y, + t1 * T1.z + t2 * T2.z + sqrtf(fmaxf(1.0f - t1 * t1 - t2 * t2, 0.0f)) * Vh.z + ); + + float3 unstretched = make_float3(a * Nh.x, a * Nh.y, fmaxf(0.0f, Nh.z)); + float len = sqrtf(unstretched.x*unstretched.x + unstretched.y*unstretched.y + unstretched.z*unstretched.z); + return make_float3(unstretched.x/len, unstretched.y/len, unstretched.z/len); +} + // ────────────────────────────────────────────── // Device-side ray-sphere intersection // ────────────────────────────────────────────── @@ -204,11 +278,6 @@ __device__ inline float3 PerPixel( light.y += contribution.y * emission.y; light.z += contribution.z * emission.z; - // Attenuate by albedo - contribution.x *= material.Albedo.x; - contribution.y *= material.Albedo.y; - contribution.z *= material.Albedo.z; - // Russian roulette: terminate low-contribution paths after 3 bounces if (i > 2) { @@ -228,16 +297,70 @@ __device__ inline float3 PerPixel( payload.WorldPosition.z + payload.WorldNormal.z * 0.0001f ); - // Cosine-weighted diffuse BRDF: sample direction in hemisphere via ONB - float3 u, v, w; - BuildONB(payload.WorldNormal, u, v, w); - float3 localDir = RandomCosineWeightedDirection(seed); + // ── GGX Microfacet BRDF ── + float rough = fmaxf(material.Roughness, 0.001f); + float a = rough * rough; + float a2 = a * a; + + float3 w_o = make_float3(-ray.Direction.x, -ray.Direction.y, -ray.Direction.z); + float3 N = payload.WorldNormal; + float NdotV = N.x*w_o.x + N.y*w_o.y + N.z*w_o.z; + if (NdotV < 0.001f) NdotV = 0.001f; + + float3 F0 = make_float3( + 0.04f + (material.Albedo.x - 0.04f) * material.Metallic, + 0.04f + (material.Albedo.y - 0.04f) * material.Metallic, + 0.04f + (material.Albedo.z - 0.04f) * material.Metallic + ); + + // ── Sample H from GGX VNDF (visible normals — guarantees WoDotH > 0) ── + float3 u_t, v_t, w_t; + BuildONB(N, u_t, v_t, w_t); + float3 localWo = make_float3( + u_t.x*w_o.x + u_t.y*w_o.y + u_t.z*w_o.z, + v_t.x*w_o.x + v_t.y*w_o.y + v_t.z*w_o.z, + w_t.x*w_o.x + w_t.y*w_o.y + w_t.z*w_o.z + ); + float3 localH = SampleGGX_VNDF(localWo, a, RandomFloat(seed), RandomFloat(seed)); + float NdotH = fmaxf(localH.z, 0.001f); + float WoDotH = localWo.x*localH.x + localWo.y*localH.y + localWo.z*localH.z; + + // Reflect in local frame: wi = 2*(wo·H)*H - wo + float3 localWi = make_float3( + 2.0f * WoDotH * localH.x - localWo.x, + 2.0f * WoDotH * localH.y - localWo.y, + 2.0f * WoDotH * localH.z - localWo.z + ); + float NdotL = fmaxf(localWi.z, 0.001f); - ray.Direction = make_float3( - u.x * localDir.x + v.x * localDir.y + w.x * localDir.z, - u.y * localDir.x + v.y * localDir.y + w.y * localDir.z, - u.z * localDir.x + v.z * localDir.y + w.z * localDir.z + // Transform wi back to world + float3 wi = make_float3( + u_t.x*localWi.x + v_t.x*localWi.y + w_t.x*localWi.z, + u_t.y*localWi.x + v_t.y*localWi.y + w_t.y*localWi.z, + u_t.z*localWi.x + v_t.z*localWi.y + w_t.z*localWi.z ); + + // ── Evaluate GGX BRDF ── + float D = a2 / (3.14159265358979323846f * (NdotH*NdotH*(a2-1.0f)+1.0f) * (NdotH*NdotH*(a2-1.0f)+1.0f)); + float G1_v = 2.0f*NdotV / (NdotV + sqrtf(NdotV*NdotV*(1.0f-a2)+a2)); + float G1_l = 2.0f*NdotL / (NdotL + sqrtf(NdotL*NdotL*(1.0f-a2)+a2)); + float G = G1_v * G1_l; + float3 F = FresnelSchlick(WoDotH, F0); + + float3 spec = make_float3(D*G*F.x/(4.0f*NdotL*NdotV), D*G*F.y/(4.0f*NdotL*NdotV), D*G*F.z/(4.0f*NdotL*NdotV)); + float3 kD = make_float3((1.0f-F.x)*(1.0f-material.Metallic), (1.0f-F.y)*(1.0f-material.Metallic), (1.0f-F.z)*(1.0f-material.Metallic)); + float3 diff = make_float3(kD.x*material.Albedo.x/3.14159265358979323846f, kD.y*material.Albedo.y/3.14159265358979323846f, kD.z*material.Albedo.z/3.14159265358979323846f); + + // PDF for NDF-sampled H: pdf(wi) = D*NdotH / (4*WoDotH) + float pdf = fmaxf(D * NdotH / (4.0f * WoDotH), 0.001f); + + // Combined BSDF * cosθ / pdf + contribution.x *= (spec.x + diff.x) * NdotL / pdf; + contribution.y *= (spec.y + diff.y) * NdotL / pdf; + contribution.z *= (spec.z + diff.z) * NdotL / pdf; + + float wiLen = sqrtf(wi.x*wi.x + wi.y*wi.y + wi.z*wi.z); + ray.Direction = make_float3(wi.x/wiLen, wi.y/wiLen, wi.z/wiLen); } return light; diff --git a/RayTracing/src/PathTracer.ispc b/RayTracing/src/PathTracer.ispc index 81bcbbb..36c1c72 100644 --- a/RayTracing/src/PathTracer.ispc +++ b/RayTracing/src/PathTracer.ispc @@ -52,6 +52,75 @@ static inline void normalize3(varying float v[3]) { } } +// ── GGX Microfacet BRDF ── + +static inline void FresnelSchlick3(float cosTheta, varying float F0[3], varying float out[3]) { + varying float t = max(1.0f - cosTheta, 0.0f); + varying float t5 = t * t * t * t * t; + out[0] = F0[0] + (1.0f - F0[0]) * t5; + out[1] = F0[1] + (1.0f - F0[1]) * t5; + out[2] = F0[2] + (1.0f - F0[2]) * t5; +} + +static inline float GGX_D(float NdotH, float a) { + float a2 = a * a; + float d = NdotH * NdotH * (a2 - 1.0f) + 1.0f; + return a2 / (3.14159265358979323846f * d * d); +} + +static inline float GGX_G1(float NdotV, float a) { + float a2 = a * a; + float denom = NdotV + sqrt(NdotV * NdotV * (1.0f - a2) + a2); + return 2.0f * NdotV / max(denom, 0.0001f); +} + +static inline float GGX_G(float NdotL, float NdotV, float a) { + return GGX_G1(NdotL, a) * GGX_G1(NdotV, a); +} + +static inline void SampleGGX_VNDF( + varying float Vx, varying float Vy, varying float Vz, + float a, varying float r1, varying float r2, + varying float out[3] +) { + // Stretch V + varying float Vhx = a * Vx, Vhy = a * Vy, Vhz = Vz; + varying float vhLen = sqrt(Vhx*Vhx + Vhy*Vhy + Vhz*Vhz); + Vhx /= vhLen; Vhy /= vhLen; Vhz /= vhLen; + + // T1 = normalize(cross(up, Vh)) + varying float T1x, T1y, T1z; + if (Vhz < 0.9999f) { + T1x = -Vhy; T1y = Vhx; T1z = 0.0f; + varying float t1Len = sqrt(T1x*T1x + T1y*T1y); + T1x /= t1Len; T1y /= t1Len; + } else { + T1x = 1.0f; T1y = 0.0f; T1z = 0.0f; + } + + // T2 = cross(Vh, T1) + varying float T2x = Vhy*T1z - Vhz*T1y; + varying float T2y = Vhz*T1x - Vhx*T1z; + varying float T2z = Vhx*T1y - Vhy*T1x; + + varying float r = sqrt(r1); + varying float phi = 2.0f * 3.14159265358979323846f * r2; + varying float t1 = r * cos(phi); + varying float t2 = r * sin(phi); + varying float s = 0.5f * (1.0f + Vhz); + t2 = (1.0f - s) * sqrt(max(1.0f - t1*t1, 0.0f)) + s * t2; + + varying float nhx = t1*T1x + t2*T2x + sqrt(max(1.0f - t1*t1 - t2*t2, 0.0f)) * Vhx; + varying float nhy = t1*T1y + t2*T2y + sqrt(max(1.0f - t1*t1 - t2*t2, 0.0f)) * Vhy; + varying float nhz = t1*T1z + t2*T2z + sqrt(max(1.0f - t1*t1 - t2*t2, 0.0f)) * Vhz; + + // Unstretch + out[0] = a * nhx; + out[1] = a * nhy; + out[2] = max(0.0f, nhz); + normalize3(out); +} + // ── Sphere Intersection (matches Renderer.cpp TraceRay exactly) ── // Returns: hitDistance (>0 = hit, <0 = miss), and sets closestSphere index // The quadratic: a*t^2 + b*t + c = 0 where a = dot(dir,dir) ≈ 1 @@ -182,16 +251,11 @@ export void ISPCRenderPixels( varying float alG = matAlbedoG[hmi]; varying float alB = matAlbedoB[hmi]; - // ── Accumulate emission (BEFORE albedo attenuation) ── + // ── Accumulate emission ── lightR += contribR * emR; lightG += contribG * emG; lightB += contribB * emB; - // ── Attenuate contribution by albedo ── - contribR *= alR; - contribG *= alG; - contribB *= alB; - // ── Russian roulette (after 3 guaranteed bounces) ── if (bounce > 2) { varying float p = contribR; @@ -214,22 +278,75 @@ export void ISPCRenderPixels( rOrigY = hitPosY + normY * 0.0001f; rOrigZ = hitPosZ + normZ * 0.0001f; - // ── Diffuse BRDF: sample random direction in hemisphere ── - varying float randDir[3]; - RandomInUnitSphere(randDir, seed); - - rDirX = normX + randDir[0]; - rDirY = normY + randDir[1]; - rDirZ = normZ + randDir[2]; - - // Normalize - varying float dirLenSq = rDirX * rDirX + rDirY * rDirY + rDirZ * rDirZ; - if (dirLenSq > 0.0f) { - varying float invLen = rsqrt(dirLenSq); - rDirX *= invLen; - rDirY *= invLen; - rDirZ *= invLen; + // Load roughness + metallic for GGX + varying float roughVal = max(matRoughness[hmi], 0.001f); + varying float metalVal = matMetallic[hmi]; + varying float a = roughVal * roughVal; + + // ── GGX Microfacet BRDF ── + // Outgoing direction (pointing away from surface) + varying float woX = -rDirX, woY = -rDirY, woZ = -rDirZ; + + // Fresnel F0 + varying float F0[3]; + F0[0] = 0.04f + (alR - 0.04f) * metalVal; + F0[1] = 0.04f + (alG - 0.04f) * metalVal; + F0[2] = 0.04f + (alB - 0.04f) * metalVal; + + float NdotV = max(dot3(normX, normY, normZ, woX, woY, woZ), 0.001f); + + varying float rn1 = RandomFloat(seed), rn2 = RandomFloat(seed); + varying float H[3]; + SampleGGX_VNDF(woX, woY, woZ, a, rn1, rn2, H); + float NdotH = max(dot3(normX, normY, normZ, H[0], H[1], H[2]), 0.001f); + + // Reflect outgoing around H using dot(wo, H) + varying float WoDotH_signed = woX*H[0] + woY*H[1] + woZ*H[2]; + varying float wiX = 2.0f * WoDotH_signed * H[0] - woX; + varying float wiY = 2.0f * WoDotH_signed * H[1] - woY; + varying float wiZ = 2.0f * WoDotH_signed * H[2] - woZ; + { + varying float wiLenSq = wiX*wiX + wiY*wiY + wiZ*wiZ; + if (wiLenSq > 0.0f) { + varying float inv = rsqrt(wiLenSq); + wiX *= inv; wiY *= inv; wiZ *= inv; + } } + float NdotL = max(dot3(normX, normY, normZ, wiX, wiY, wiZ), 0.001f); + + // WoDotH for Fresnel/PDF (absolute value) + varying float WoDotH = WoDotH_signed; + if (WoDotH < 0.0f) WoDotH = -WoDotH; + + float D = GGX_D(NdotH, a); + float G = GGX_G(NdotL, NdotV, a); + varying float F[3]; + FresnelSchlick3(WoDotH, F0, F); + + varying float specR = D * G * F[0] / (4.0f * NdotL * NdotV + 0.001f); + varying float specG = D * G * F[1] / (4.0f * NdotL * NdotV + 0.001f); + varying float specB = D * G * F[2] / (4.0f * NdotL * NdotV + 0.001f); + + varying float kdR = (1.0f - F[0]) * (1.0f - metalVal); + varying float kdG = (1.0f - F[1]) * (1.0f - metalVal); + varying float kdB = (1.0f - F[2]) * (1.0f - metalVal); + varying float diffR = kdR * alR / 3.14159265358979323846f; + varying float diffG = kdG * alG / 3.14159265358979323846f; + varying float diffB = kdB * alB / 3.14159265358979323846f; + + varying float pdf = max(D * NdotH / (4.0f * WoDotH + 0.001f), 0.001f); + + varying float bsdfR = (specR + diffR) * NdotL; + varying float bsdfG = (specG + diffG) * NdotL; + varying float bsdfB = (specB + diffB) * NdotL; + + contribR *= bsdfR / pdf; + contribG *= bsdfG / pdf; + contribB *= bsdfB / pdf; + + rDirX = wiX; + rDirY = wiY; + rDirZ = wiZ; } // ── Write output ── diff --git a/RayTracing/src/Renderer.cpp b/RayTracing/src/Renderer.cpp index 465f984..7067a20 100644 --- a/RayTracing/src/Renderer.cpp +++ b/RayTracing/src/Renderer.cpp @@ -54,6 +54,51 @@ namespace Utils RandomFloat(seed) * 2.0f - 1.0f )); } + + // ── GGX Microfacet ── + static glm::vec3 FresnelSchlick(float cosTheta, const glm::vec3& F0) + { + float t = glm::max(1.0f - cosTheta, 0.0f); + float t5 = t * t * t * t * t; + return F0 + (glm::vec3(1.0f) - F0) * t5; + } + + static float GGX_D(float NdotH, float a) + { + float a2 = a * a; + float d = NdotH * NdotH * (a2 - 1.0f) + 1.0f; + return a2 / (glm::pi() * d * d); + } + + static float GGX_G1(float NdotV, float a) + { + float a2 = a * a; + float denom = NdotV + glm::sqrt(NdotV * NdotV * (1.0f - a2) + a2); + return 2.0f * NdotV / glm::max(denom, 0.0001f); + } + + static float GGX_G(float NdotL, float NdotV, float a) + { + return GGX_G1(NdotL, a) * GGX_G1(NdotV, a); + } + + static glm::vec3 SampleGGX_VNDF(const glm::vec3& V, float a, float r1, float r2) + { + glm::vec3 Vh = glm::normalize(glm::vec3(a * V.x, a * V.y, V.z)); + glm::vec3 up(0.0f, 0.0f, 1.0f); + glm::vec3 T1 = (Vh.z < 0.9999f) ? glm::normalize(glm::cross(up, Vh)) : glm::vec3(1.0f, 0.0f, 0.0f); + glm::vec3 T2 = glm::cross(Vh, T1); + + float r = glm::sqrt(r1); + float phi = 2.0f * glm::pi() * r2; + float t1 = r * glm::cos(phi); + float t2 = r * glm::sin(phi); + float s = 0.5f * (1.0f + Vh.z); + t2 = (1.0f - s) * glm::sqrt(glm::max(1.0f - t1 * t1, 0.0f)) + s * t2; + + glm::vec3 Nh = t1 * T1 + t2 * T2 + glm::sqrt(glm::max(1.0f - t1*t1 - t2*t2, 0.0f)) * Vh; + return glm::normalize(glm::vec3(a * Nh.x, a * Nh.y, glm::max(0.0f, Nh.z))); + } } #endif // !WL_CUDA @@ -361,7 +406,6 @@ glm::vec4 Renderer::PerPixel(uint32_t x, uint32_t y) const // This order matches the GPU path (CUDARenderer.cuh:193-200) and the // path tracing integral: Le * ∏(previous BSDFs) light += contribution * material.GetEmission(); - contribution *= material.Albedo; // Russian roulette: probabilistically terminate low-contribution paths (after 3 guaranteed bounces) if (i > 2) @@ -373,10 +417,35 @@ glm::vec4 Renderer::PerPixel(uint32_t x, uint32_t y) const } ray.Origin = WorldPosition + WorldNormal * 0.0001f; - if (m_Settings.SlowRandom) - ray.Direction = glm::normalize(WorldNormal + Walnut::Random::InUnitSphere()); - else - ray.Direction = glm::normalize(WorldNormal + Utils::InUnitSphere(seed)); + + // ── GGX Microfacet BRDF ── + const glm::vec3 w_o = -ray.Direction; + const glm::vec3 F0 = glm::mix(glm::vec3(0.04f), material.Albedo, material.Metallic); + const float rough = glm::max(material.Roughness, 0.001f); + const float a = rough * rough; + + const float r1 = Utils::RandomFloat(seed), r2 = Utils::RandomFloat(seed); + const glm::vec3 H = Utils::SampleGGX_VNDF(w_o, a, r1, r2); + const float NdotH = glm::max(glm::dot(WorldNormal, H), 0.001f); + const glm::vec3 wi = glm::reflect(-w_o, H); + const float NdotL = glm::max(glm::dot(WorldNormal, wi), 0.001f); + const float NdotV = glm::max(glm::dot(WorldNormal, w_o), 0.001f); + const float WoDotH = glm::abs(glm::dot(w_o, H)); + + const float D = Utils::GGX_D(NdotH, a); + const float G = Utils::GGX_G(NdotL, NdotV, a); + const glm::vec3 F = Utils::FresnelSchlick(WoDotH, F0); + + const glm::vec3 specBRDF = D * G * F / (4.0f * NdotL * NdotV + 0.001f); + const glm::vec3 kD = (glm::vec3(1.0f) - F) * (1.0f - material.Metallic); + const glm::vec3 diffBRDF = kD * material.Albedo / glm::pi(); + + const float pdf = glm::max(D * NdotH / (4.0f * WoDotH + 0.001f), 0.001f); + + const glm::vec3 bsdf = (specBRDF + diffBRDF) * NdotL; + contribution *= bsdf / pdf; + + ray.Direction = glm::normalize(wi); } return { light, 1.0f }; }