From 5810ee78fd8e2cfb256cee9063eebbda59666341 Mon Sep 17 00:00:00 2001 From: MiRinChan <148533509+MiRinChan@users.noreply.github.com> Date: Thu, 28 May 2026 21:49:53 +0800 Subject: [PATCH 1/6] fix(gpu): loosen shader compatibility checks --- .../main/java/rip/ysm/gpu/BoneSkinShader.java | 9 +++-- .../main/java/rip/ysm/gpu/GpuCapability.java | 37 +++++++++---------- .../main/java/rip/ysm/gpu/IrisRenderPath.java | 2 + common/src/main/resources/bone_skin.fsh | 2 +- common/src/main/resources/bone_skin.vsh | 13 +++---- 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/common/src/main/java/rip/ysm/gpu/BoneSkinShader.java b/common/src/main/java/rip/ysm/gpu/BoneSkinShader.java index 4724d1b..bc934f1 100644 --- a/common/src/main/java/rip/ysm/gpu/BoneSkinShader.java +++ b/common/src/main/java/rip/ysm/gpu/BoneSkinShader.java @@ -3,8 +3,9 @@ import com.elfmcys.yesstevemodel.YesSteveModel; import com.elfmcys.yesstevemodel.util.log.ChatLogger; import com.mojang.blaze3d.systems.RenderSystem; +import org.lwjgl.opengl.ARBProgramInterfaceQuery; +import org.lwjgl.opengl.ARBShaderStorageBufferObject; import org.lwjgl.opengl.GL20; -import org.lwjgl.opengl.GL43; public final class BoneSkinShader { public static final int ssbo = 0; @@ -37,9 +38,9 @@ public static synchronized boolean ensureCompiled() { GL20.glBindAttribLocation(p, 4, "a_cullable"); }, vs, fs); - int ssboBlock = GL43.glGetProgramResourceIndex(prog, GL43.GL_SHADER_STORAGE_BLOCK, "BoneBlock"); - if (ssboBlock != GL43.GL_INVALID_INDEX) { - GL43.glShaderStorageBlockBinding(prog, ssboBlock, ssbo); + int ssboBlock = ARBProgramInterfaceQuery.glGetProgramResourceIndex(prog, ARBProgramInterfaceQuery.GL_SHADER_STORAGE_BLOCK, "BoneBlock"); + if (ssboBlock != -1) { + ARBShaderStorageBufferObject.glShaderStorageBlockBinding(prog, ssboBlock, ssbo); } locProj = GL20.glGetUniformLocation(prog, "u_proj"); diff --git a/common/src/main/java/rip/ysm/gpu/GpuCapability.java b/common/src/main/java/rip/ysm/gpu/GpuCapability.java index 49aa969..7bb55cd 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuCapability.java +++ b/common/src/main/java/rip/ysm/gpu/GpuCapability.java @@ -26,16 +26,16 @@ public static synchronized void check() { checked = true; if (System.getProperty("OYSM_DISABLE_GPU") != null) { - reason = "gpu renderer has been disabled"; + unavailable("gpu renderer has been disabled"); return; } if (!NativeLibLoader.isLoaded()) { - reason = "native ysm-core not loaded"; + unavailable("native ysm-core not loaded"); return; } String osName = System.getProperty("os.name", "").toLowerCase(); if (osName.contains("mac") || osName.contains("darwin")) { - reason = "macOS GL is capped at 4.1 and lacks GL_ARB_shader_storage_buffer_object"; + unavailable("macOS GL is capped at 4.1 and lacks GL_ARB_shader_storage_buffer_object"); return; } @@ -52,52 +52,51 @@ public static synchronized void check() { glVendor = GL11.glGetString(GL11.GL_VENDOR); glslVersion = GL11.glGetString(0x8B8C); } catch (Throwable t) { - reason = "GL capabilities not available: " + t.getMessage(); + unavailable("GL capabilities not available: " + t.getMessage()); return; } if (glVersion == null) { - reason = "GL version not available"; + unavailable("GL version not available"); return; } - System.out.println("OpenGL version: " + glVersion); - System.out.println("OpenGL renderer version: " + glRenderer); - System.out.println("OpenGL vendor: " + glVendor); - System.out.println("OpenGL glsl version: " + glslVersion); + if (glVersion.regionMatches(true, 0, "OpenGL ES", 0, "OpenGL ES".length())) { + unavailable("OpenGL ES context is not supported by GPU renderer; desktop OpenGL 4.3 or ARB shader storage buffer support is required (got " + glVersion + ")"); + return; + } if (!caps.OpenGL30) { - reason = "OpenGL 3.0 not supported (got " + glVersion + ")"; + unavailable("OpenGL 3.0 not supported (got " + glVersion + ")"); return; } boolean hasSsbo = caps.OpenGL43 || caps.GL_ARB_shader_storage_buffer_object; boolean hasIfaceQuery = caps.OpenGL43 || caps.GL_ARB_program_interface_query; boolean hasLayoutBinding = caps.OpenGL42 || caps.GL_ARB_shading_language_420pack; - boolean hasExplicitAttrib = caps.OpenGL33 || caps.GL_ARB_explicit_attrib_location; boolean hasPackedNormal = caps.OpenGL33 || caps.GL_ARB_vertex_type_2_10_10_10_rev; if (!hasSsbo) { - reason = "SSBO not supported, GL_VERSION=" + glVersion; + unavailable("SSBO not supported, GL_VERSION=" + glVersion); return; } if (!hasIfaceQuery) { - reason = "GL_ARB_program_interface_query not supported; GL_VERSION=" + glVersion; + unavailable("GL_ARB_program_interface_query not supported; GL_VERSION=" + glVersion); return; } if (!hasLayoutBinding) { - reason = "GL_ARB_shading_language_420pack not supported; GL_VERSION=" + glVersion; - return; - } - if (!hasExplicitAttrib) { - reason = "GL_ARB_explicit_attrib_location not supported; GL_VERSION=" + glVersion; + unavailable("GL_ARB_shading_language_420pack not supported; GL_VERSION=" + glVersion); return; } if (!hasPackedNormal) { - reason = "GL_ARB_vertex_type_2_10_10_10_rev not supported; GL_VERSION=" + glVersion; + unavailable("GL_ARB_vertex_type_2_10_10_10_rev not supported; GL_VERSION=" + glVersion); return; } available = true; reason = "ok (GL " + glVersion + ", " + glRenderer + ")"; } + + private static void unavailable(String unavailableReason) { + reason = unavailableReason; + } } diff --git a/common/src/main/java/rip/ysm/gpu/IrisRenderPath.java b/common/src/main/java/rip/ysm/gpu/IrisRenderPath.java index aac3f45..105a015 100644 --- a/common/src/main/java/rip/ysm/gpu/IrisRenderPath.java +++ b/common/src/main/java/rip/ysm/gpu/IrisRenderPath.java @@ -9,6 +9,7 @@ import net.minecraft.resources.ResourceLocation; import org.joml.Matrix3f; import org.joml.Matrix4f; +import org.lwjgl.opengl.GL; import org.lwjgl.opengl.GL11; import org.lwjgl.opengl.GL15; import org.lwjgl.opengl.GL20; @@ -23,6 +24,7 @@ public final class IrisRenderPath { public static boolean tryRender(GeoModel model, PoseStack.Pose pose, float[] boneParams, int renderPartMask, int packedLight, int packedOverlay, float r, float g, float b, float a, ResourceLocation textureLocation) { if (!GpuCapability.isAvailable()) return false; + if (!GL.getCapabilities().OpenGL43) return false; if (!BoneXformCompute.ensureCompiled()) return false; if (model.bakedBones == null || model.bakedBones.isEmpty()) return false; diff --git a/common/src/main/resources/bone_skin.fsh b/common/src/main/resources/bone_skin.fsh index 4d281d1..844e37f 100644 --- a/common/src/main/resources/bone_skin.fsh +++ b/common/src/main/resources/bone_skin.fsh @@ -1,4 +1,4 @@ -#version 430 core +#version 150 core uniform sampler2D Sampler0; uniform sampler2D Sampler1; diff --git a/common/src/main/resources/bone_skin.vsh b/common/src/main/resources/bone_skin.vsh index de42d64..8615581 100644 --- a/common/src/main/resources/bone_skin.vsh +++ b/common/src/main/resources/bone_skin.vsh @@ -1,13 +1,12 @@ -#version 430 core +#version 150 core #extension GL_ARB_shader_storage_buffer_object : require #extension GL_ARB_shading_language_420pack : require -#extension GL_ARB_explicit_attrib_location : require -layout(location = 0) in vec3 a_position; -layout(location = 1) in vec2 a_uv; -layout(location = 2) in vec4 a_normal; -layout(location = 3) in uint a_boneId; -layout(location = 4) in float a_cullable; +in vec3 a_position; +in vec2 a_uv; +in vec4 a_normal; +in uint a_boneId; +in float a_cullable; out float v_cullable; From 50ce35d74476e661276cf82921fc607797f43301 Mon Sep 17 00:00:00 2001 From: MiRinChan <148533509+MiRinChan@users.noreply.github.com> Date: Thu, 28 May 2026 21:50:16 +0800 Subject: [PATCH 2/6] fix(gpu): restore render state after GPU draws --- .../geckolib3/geo/NativeModelRenderer.java | 17 +- .../main/java/rip/ysm/gpu/GpuRenderPath.java | 408 ++++++++++++++---- 2 files changed, 325 insertions(+), 100 deletions(-) diff --git a/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java b/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java index 88b889b..bc66f34 100644 --- a/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java +++ b/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java @@ -6,7 +6,6 @@ import com.elfmcys.yesstevemodel.client.renderer.ModelPreviewRenderer; import com.elfmcys.yesstevemodel.config.GeneralConfig; import com.elfmcys.yesstevemodel.geckolib3.geo.render.built.GeoModel; -import com.elfmcys.yesstevemodel.util.log.ChatLogger; import com.mojang.blaze3d.systems.RenderSystem; import com.mojang.blaze3d.vertex.PoseStack; import com.mojang.blaze3d.vertex.VertexConsumer; @@ -37,22 +36,18 @@ public static void renderMesh(VertexConsumer buffer, PoseStack.Pose pose, GeoMod OculusCompat.updatePBRState(); RenderSystem.getProjectionMatrix().mul(RenderSystem.getModelViewMatrix(), projectionModelViewMatrix); boolean isPreview = ModelPreviewRenderer.isPreview() || ModelPreviewRenderer.isExtraPlayer(); + String gpuRenderContext = ModelPreviewRenderer.isExtraPlayer() ? "paperdoll" : (ModelPreviewRenderer.isPreview() ? "preview" : "world"); if (textureLocation != null && NativeLibLoader.isLoaded() && !GeneralConfig.USE_COMPATIBILITY_RENDERER.get() && GeneralConfig.USE_GPU_RENDERER.get()) { - if(!GpuCapability.isAvailable()) - { - ChatLogger.INSTANCE.logFormatted("Disabled GPU renderer for: " + GpuCapability.getReason()); - GeneralConfig.USE_GPU_RENDERER.set(false); - return; - } - - if (OculusCompat.isShaderPackInUse() && !isPreview) { + if (!GpuCapability.isAvailable()) { + GpuRenderPath.debugFallback(gpuRenderContext, GpuCapability.getReason(), renderPartMask, packedLight, textureLocation); + } else if (OculusCompat.isShaderPackInUse() && !isPreview) { if (IrisRenderPath.tryRender(model, pose, boneParams, renderPartMask, packedLight, packedOverlay, red, green, blue, alpha, textureLocation)) { return; } } else { - if (GpuRenderPath.tryRender(model, pose, boneParams, stateBuffer, renderPartMask, packedLight, packedOverlay, red, green, blue, alpha, textureLocation)) { + if (GpuRenderPath.tryRender(model, pose, boneParams, stateBuffer, renderPartMask, packedLight, packedOverlay, red, green, blue, alpha, textureLocation, gpuRenderContext)) { return; } } @@ -296,4 +291,4 @@ public static void nativeRenderModel( // TODO: r, g, b, a ); } -} \ No newline at end of file +} diff --git a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java index f7fc804..e87bcbb 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java +++ b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java @@ -1,5 +1,6 @@ package rip.ysm.gpu; +import com.elfmcys.yesstevemodel.YesSteveModel; import com.elfmcys.yesstevemodel.geckolib3.geo.render.built.GeoModel; import com.elfmcys.yesstevemodel.mixin.client.RenderSystemAccessor; import com.mojang.blaze3d.platform.GlStateManager; @@ -27,6 +28,10 @@ public final class GpuRenderPath { private static final ConcurrentHashMap meshMap = new ConcurrentHashMap<>(); private static final AtomicLong ref = new AtomicLong(1); private static final Matrix4f pivotAbsScratchMat = new Matrix4f(); + private static final boolean DEBUG = Boolean.getBoolean("ysm.gpu.debug"); + private static final int DEBUG_SAMPLE_INTERVAL = 240; + private static int debugSuccessCount; + private static int debugFallbackCount; private static int[] pivotAbsPathScratch = new int[64]; public static boolean tryRender( @@ -40,109 +45,175 @@ public static boolean tryRender( float r, float g, float b, float a, ResourceLocation textureLocation ) { - if (!GpuCapability.isAvailable()) return false; - if (!BoneSkinShader.ensureCompiled()) return false; - if (model.bakedBones == null || model.bakedBones.isEmpty()) return false; + return tryRender(model, pose, boneParams, stateBuffer, renderPartMask, packedLight, packedOverlay, r, g, b, a, textureLocation, "unknown"); + } - if (model.gpuMeshHandle == 0) { - GpuMesh mesh = GpuMeshBuilder.build(model); - if (mesh == null) return false; - model.gpuMeshHandle = encodeMeshRef(mesh); + public static boolean tryRender( + GeoModel model, + PoseStack.Pose pose, + float[] boneParams, + float[] stateBuffer, + int renderPartMask, + int packedLight, + int packedOverlay, + float r, float g, float b, float a, + ResourceLocation textureLocation, + String renderContext + ) { + if (!GpuCapability.isAvailable()) { + debugFallback(renderContext, GpuCapability.getReason(), renderPartMask, packedLight, textureLocation); + return false; + } + if (!BoneSkinShader.ensureCompiled()) { + debugFallback(renderContext, "shader compile failed", renderPartMask, packedLight, textureLocation); + return false; + } + if (model.bakedBones == null || model.bakedBones.isEmpty()) { + debugFallback(renderContext, "empty model", renderPartMask, packedLight, textureLocation); + return false; } - GpuMesh mesh = decodeMeshRef(model.gpuMeshHandle); - if (mesh == null) return false; - - Matrix4f rootPose = pose.pose(); - Matrix3f rootNormal = pose.normal(); - Matrix4f projMat = RenderSystem.getProjectionMatrix(); - Matrix4f mvMat = RenderSystem.getModelViewMatrix(); - - rootPose.get(rootPoseScratch); - rootNormal.get(rootNormalScratch); - projMat.mul(mvMat, projMVScratch); - projMVScratch.get(projScratch); - - ByteBuffer boneBuf = mesh.perFrameBoneBuffer; - boneBuf.clear(); - - updatePivotAbsStateBuffer(model, boneParams, stateBuffer); - - GeoModel.nComputeBoneMatrices(mesh.pointer, rootPoseScratch, rootNormalScratch, boneParams, packedLight, boneBuf); - boneBuf.position(0); - boneBuf.limit(mesh.boneCount * 144); - - RenderSystem.disableCull(); - RenderSystem.enableDepthTest(); - RenderSystem.depthMask(true); - RenderSystem.disableBlend(); - - Minecraft mc = Minecraft.getInstance(); - AbstractTexture modelTex = mc.getTextureManager().getTexture(textureLocation); - int modelTexId = modelTex.getId(); - - GlStateManager._activeTexture(GL13.GL_TEXTURE0 + 2); - mc.gameRenderer.lightTexture().turnOnLightLayer(); - - GlStateManager._activeTexture(GL13.GL_TEXTURE0 + 1); - mc.gameRenderer.overlayTexture().setupOverlayColor(); - GlStateManager._bindTexture(RenderSystem.getShaderTexture(1)); // overlayTexture里的texture没getter,固定bind 1 - - GlStateManager._activeTexture(GL13.GL_TEXTURE0); - GlStateManager._bindTexture(modelTexId); - - GL15.glBindBuffer(GL43.GL_SHADER_STORAGE_BUFFER, mesh.boneSsbo); - GL15.glBufferSubData(GL43.GL_SHADER_STORAGE_BUFFER, 0L, boneBuf); - GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, BoneSkinShader.ssbo, mesh.boneSsbo); - - float fogStart = RenderSystem.getShaderFogStart(); - float fogEnd = RenderSystem.getShaderFogEnd(); - float[] fogColor = RenderSystem.getShaderFogColor(); - int fogShape = RenderSystem.getShaderFogShape().getIndex(); - - GlStateManager._glUseProgram(BoneSkinShader.program()); - if (BoneSkinShader.locProj() >= 0) GL20.glUniformMatrix4fv(BoneSkinShader.locProj(), false, projScratch); - if (BoneSkinShader.locColor() >= 0) GL20.glUniform4f(BoneSkinShader.locColor(), r, g, b, a); - if (BoneSkinShader.locOverlay() >= 0) GL20.glUniform1i(BoneSkinShader.locOverlay(), packedOverlay); - if (BoneSkinShader.locFogStart() >= 0) GL20.glUniform1f(BoneSkinShader.locFogStart(), fogStart); - if (BoneSkinShader.locFogEnd() >= 0) GL20.glUniform1f(BoneSkinShader.locFogEnd(), fogEnd); - if (BoneSkinShader.locFogColor() >= 0) - GL20.glUniform4f(BoneSkinShader.locFogColor(), fogColor[0], fogColor[1], fogColor[2], fogColor[3]); + GlStateSnapshot snapshot = GlStateSnapshot.capture(); + int drawCount = 0; + try { + if (model.gpuMeshHandle == 0) { + GpuMesh builtMesh = GpuMeshBuilder.build(model); + if (builtMesh == null) { + debugFallback(renderContext, "mesh build failed", renderPartMask, packedLight, textureLocation); + return false; + } + model.gpuMeshHandle = encodeMeshRef(builtMesh); + } + GpuMesh mesh = decodeMeshRef(model.gpuMeshHandle); + if (mesh == null) { + debugFallback(renderContext, "missing mesh handle", renderPartMask, packedLight, textureLocation); + return false; + } - if (BoneSkinShader.locFogShape() >= 0) GL20.glUniform1i(BoneSkinShader.locFogShape(), fogShape); + Matrix4f rootPose = pose.pose(); + Matrix3f rootNormal = pose.normal(); + Matrix4f projMat = RenderSystem.getProjectionMatrix(); + Matrix4f mvMat = RenderSystem.getModelViewMatrix(); - refreshLights(); + rootPose.get(rootPoseScratch); + rootNormal.get(rootNormalScratch); + projMat.mul(mvMat, projMVScratch); + projMVScratch.get(projScratch); - if (BoneSkinShader.locLight0() >= 0) - GL20.glUniform3f(BoneSkinShader.locLight0(), currentLights[0].x, currentLights[0].y, currentLights[0].z); - if (BoneSkinShader.locLight1() >= 0) - GL20.glUniform3f(BoneSkinShader.locLight1(), currentLights[1].x, currentLights[1].y, currentLights[1].z); + ByteBuffer boneBuf = mesh.perFrameBoneBuffer; + boneBuf.clear(); - GlStateManager._glBindVertexArray(mesh.vao); + updatePivotAbsStateBuffer(model, boneParams, stateBuffer); - int offsetBytes = mesh.indexOffsetBytes(renderPartMask); - int drawCount = mesh.indexDrawCount(renderPartMask); - if (drawCount > 0) { - if (BoneSkinShader.locAlphaMode() >= 0) GL20.glUniform1i(BoneSkinShader.locAlphaMode(), 1); - GL11.glDrawElements(GL11.GL_TRIANGLES, drawCount, GL11.GL_UNSIGNED_INT, offsetBytes); + GeoModel.nComputeBoneMatrices(mesh.pointer, rootPoseScratch, rootNormalScratch, boneParams, packedLight, boneBuf); + boneBuf.position(0); + boneBuf.limit(mesh.boneCount * 144); - RenderSystem.enableBlend(); - RenderSystem.defaultBlendFunc(); - if (BoneSkinShader.locAlphaMode() >= 0) GL20.glUniform1i(BoneSkinShader.locAlphaMode(), 2); - GL11.glDrawElements(GL11.GL_TRIANGLES, drawCount, GL11.GL_UNSIGNED_INT, offsetBytes); + RenderSystem.disableCull(); + RenderSystem.enableDepthTest(); + RenderSystem.depthMask(true); RenderSystem.disableBlend(); - } + GlStateManager._blendFuncSeparate(GL11.GL_SRC_ALPHA, GL11.GL_ONE_MINUS_SRC_ALPHA, GL11.GL_ONE, GL11.GL_ZERO); + GlStateManager._blendEquation(GL14.GL_FUNC_ADD); + GL11.glFrontFace(GL11.GL_CCW); + + Minecraft mc = Minecraft.getInstance(); + AbstractTexture modelTex = mc.getTextureManager().getTexture(textureLocation); + int modelTexId = modelTex.getId(); + + GlStateManager._activeTexture(GL13.GL_TEXTURE0 + 2); + mc.gameRenderer.lightTexture().turnOnLightLayer(); + + GlStateManager._activeTexture(GL13.GL_TEXTURE0 + 1); + mc.gameRenderer.overlayTexture().setupOverlayColor(); + GlStateManager._bindTexture(RenderSystem.getShaderTexture(1)); // overlayTexture里的texture没getter,固定bind 1 + + GlStateManager._activeTexture(GL13.GL_TEXTURE0); + GlStateManager._bindTexture(modelTexId); + + int boneSsbo = mesh.boneSsbo; + GL15.glBindBuffer(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, boneSsbo); + GL15.glBufferSubData(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, 0L, boneBuf); + GL30.glBindBufferBase(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, BoneSkinShader.ssbo, boneSsbo); + + float fogStart = RenderSystem.getShaderFogStart(); + float fogEnd = RenderSystem.getShaderFogEnd(); + float[] fogColor = RenderSystem.getShaderFogColor(); + int fogShape = RenderSystem.getShaderFogShape().getIndex(); + + GlStateManager._glUseProgram(BoneSkinShader.program()); + if (BoneSkinShader.locProj() >= 0) GL20.glUniformMatrix4fv(BoneSkinShader.locProj(), false, projScratch); + if (BoneSkinShader.locColor() >= 0) GL20.glUniform4f(BoneSkinShader.locColor(), r, g, b, a); + if (BoneSkinShader.locOverlay() >= 0) GL20.glUniform1i(BoneSkinShader.locOverlay(), packedOverlay); + if (BoneSkinShader.locFogStart() >= 0) GL20.glUniform1f(BoneSkinShader.locFogStart(), fogStart); + if (BoneSkinShader.locFogEnd() >= 0) GL20.glUniform1f(BoneSkinShader.locFogEnd(), fogEnd); + + if (BoneSkinShader.locFogColor() >= 0) + GL20.glUniform4f(BoneSkinShader.locFogColor(), fogColor[0], fogColor[1], fogColor[2], fogColor[3]); + + if (BoneSkinShader.locFogShape() >= 0) GL20.glUniform1i(BoneSkinShader.locFogShape(), fogShape); + + refreshLights(); + + if (BoneSkinShader.locLight0() >= 0) + GL20.glUniform3f(BoneSkinShader.locLight0(), currentLights[0].x, currentLights[0].y, currentLights[0].z); + if (BoneSkinShader.locLight1() >= 0) + GL20.glUniform3f(BoneSkinShader.locLight1(), currentLights[1].x, currentLights[1].y, currentLights[1].z); + + GlStateManager._glBindVertexArray(mesh.vao); + + int offsetBytes = mesh.indexOffsetBytes(renderPartMask); + drawCount = mesh.indexDrawCount(renderPartMask); + if (drawCount > 0) { + if (BoneSkinShader.locAlphaMode() >= 0) GL20.glUniform1i(BoneSkinShader.locAlphaMode(), 1); + GL11.glDrawElements(GL11.GL_TRIANGLES, drawCount, GL11.GL_UNSIGNED_INT, offsetBytes); + + RenderSystem.enableBlend(); + RenderSystem.defaultBlendFunc(); + if (BoneSkinShader.locAlphaMode() >= 0) GL20.glUniform1i(BoneSkinShader.locAlphaMode(), 2); + GL11.glDrawElements(GL11.GL_TRIANGLES, drawCount, GL11.GL_UNSIGNED_INT, offsetBytes); + RenderSystem.disableBlend(); + } - GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, BoneSkinShader.ssbo, 0); - GL15.glBindBuffer(GL43.GL_SHADER_STORAGE_BUFFER, 0); - GlStateManager._glUseProgram(0); + debugSuccess(renderContext, drawCount, renderPartMask, packedLight, textureLocation, snapshot); + return true; + } catch (Throwable t) { + debugFallback(renderContext, "exception: " + t.getClass().getSimpleName() + ": " + t.getMessage(), drawCount, renderPartMask, packedLight, textureLocation, snapshot); + YesSteveModel.LOGGER.error("[YSM GPU] GPU render path failed; falling back for this draw", t); + return false; + } finally { + try { + Minecraft mc = Minecraft.getInstance(); + mc.gameRenderer.overlayTexture().teardownOverlayColor(); + mc.gameRenderer.lightTexture().turnOffLightLayer(); + } catch (Throwable ignored) { + } + GL30.glBindBufferBase(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, BoneSkinShader.ssbo, 0); + GL15.glBindBuffer(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, 0); + GlStateManager._glUseProgram(0); + com.mojang.blaze3d.vertex.BufferUploader.invalidate(); + snapshot.restore(); + } + } - com.mojang.blaze3d.vertex.BufferUploader.invalidate(); - GlStateManager._glBindVertexArray(0); + public static void debugFallback(String renderContext, String reason, int renderPartMask, int packedLight, ResourceLocation textureLocation) { + debugFallback(renderContext, reason, -1, renderPartMask, packedLight, textureLocation, null); + } - mc.gameRenderer.lightTexture().turnOffLightLayer(); + private static void debugSuccess(String renderContext, int drawCount, int renderPartMask, int packedLight, ResourceLocation textureLocation, GlStateSnapshot snapshot) { + if (!DEBUG) return; + int count = ++debugSuccessCount; + if (count > 5 && count % DEBUG_SAMPLE_INTERVAL != 0) return; + YesSteveModel.LOGGER.info("[YSM GPU] render ok context={} drawCount={} partMask={} packedLight={} texture={} stateBefore={}", + renderContext, drawCount, renderPartMask, packedLight, textureLocation, snapshot); + } - return true; + private static void debugFallback(String renderContext, String reason, int drawCount, int renderPartMask, int packedLight, ResourceLocation textureLocation, GlStateSnapshot snapshot) { + if (!DEBUG) return; + int count = ++debugFallbackCount; + if (count > 12 && count % DEBUG_SAMPLE_INTERVAL != 0) return; + YesSteveModel.LOGGER.info("[YSM GPU] fallback context={} reason={} drawCount={} partMask={} packedLight={} texture={} stateBefore={}", + renderContext, reason, drawCount, renderPartMask, packedLight, textureLocation, snapshot); } private static void refreshLights() { @@ -260,4 +331,163 @@ private static void computeOnePivotAbs(int targetIdx, List b localMat.translate(-bone.pivotX / 16.0f, -bone.pivotY / 16.0f, -bone.pivotZ / 16.0f); } } + + private static final class GlStateSnapshot { + private final int program; + private final int vao; + private final int arrayBuffer; + private final int elementArrayBuffer; + private final int shaderStorageBuffer; + private final int shaderStorageBase0; + private final int activeTexture; + private final int texture0; + private final int texture1; + private final int texture2; + private final boolean blend; + private final boolean depthTest; + private final boolean cull; + private final boolean depthMask; + private final int frontFace; + private final int blendSrcRgb; + private final int blendDstRgb; + private final int blendSrcAlpha; + private final int blendDstAlpha; + private final int blendEquationRgb; + private final int blendEquationAlpha; + + private GlStateSnapshot( + int program, + int vao, + int arrayBuffer, + int elementArrayBuffer, + int shaderStorageBuffer, + int shaderStorageBase0, + int activeTexture, + int texture0, + int texture1, + int texture2, + boolean blend, + boolean depthTest, + boolean cull, + boolean depthMask, + int frontFace, + int blendSrcRgb, + int blendDstRgb, + int blendSrcAlpha, + int blendDstAlpha, + int blendEquationRgb, + int blendEquationAlpha + ) { + this.program = program; + this.vao = vao; + this.arrayBuffer = arrayBuffer; + this.elementArrayBuffer = elementArrayBuffer; + this.shaderStorageBuffer = shaderStorageBuffer; + this.shaderStorageBase0 = shaderStorageBase0; + this.activeTexture = activeTexture; + this.texture0 = texture0; + this.texture1 = texture1; + this.texture2 = texture2; + this.blend = blend; + this.depthTest = depthTest; + this.cull = cull; + this.depthMask = depthMask; + this.frontFace = frontFace; + this.blendSrcRgb = blendSrcRgb; + this.blendDstRgb = blendDstRgb; + this.blendSrcAlpha = blendSrcAlpha; + this.blendDstAlpha = blendDstAlpha; + this.blendEquationRgb = blendEquationRgb; + this.blendEquationAlpha = blendEquationAlpha; + } + + static GlStateSnapshot capture() { + int activeTexture = GL11.glGetInteger(GL13.GL_ACTIVE_TEXTURE); + int texture0 = textureBinding(GL13.GL_TEXTURE0); + int texture1 = textureBinding(GL13.GL_TEXTURE0 + 1); + int texture2 = textureBinding(GL13.GL_TEXTURE0 + 2); + GlStateManager._activeTexture(activeTexture); + + return new GlStateSnapshot( + GL11.glGetInteger(GL20.GL_CURRENT_PROGRAM), + GL11.glGetInteger(GL30.GL_VERTEX_ARRAY_BINDING), + GL11.glGetInteger(GL15.GL_ARRAY_BUFFER_BINDING), + GL11.glGetInteger(GL15.GL_ELEMENT_ARRAY_BUFFER_BINDING), + GL11.glGetInteger(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER_BINDING), + GL30.glGetIntegeri(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER_BINDING, BoneSkinShader.ssbo), + activeTexture, + texture0, + texture1, + texture2, + GL11.glIsEnabled(GL11.GL_BLEND), + GL11.glIsEnabled(GL11.GL_DEPTH_TEST), + GL11.glIsEnabled(GL11.GL_CULL_FACE), + GL11.glGetBoolean(GL11.GL_DEPTH_WRITEMASK), + GL11.glGetInteger(GL11.GL_FRONT_FACE), + GL11.glGetInteger(GL14.GL_BLEND_SRC_RGB), + GL11.glGetInteger(GL14.GL_BLEND_DST_RGB), + GL11.glGetInteger(GL14.GL_BLEND_SRC_ALPHA), + GL11.glGetInteger(GL14.GL_BLEND_DST_ALPHA), + GL11.glGetInteger(GL20.GL_BLEND_EQUATION_RGB), + GL11.glGetInteger(GL20.GL_BLEND_EQUATION_ALPHA) + ); + } + + private static int textureBinding(int textureUnit) { + GlStateManager._activeTexture(textureUnit); + return GL11.glGetInteger(GL11.GL_TEXTURE_BINDING_2D); + } + + void restore() { + restoreFlag(blend, RenderSystem::enableBlend, RenderSystem::disableBlend); + restoreFlag(depthTest, RenderSystem::enableDepthTest, RenderSystem::disableDepthTest); + restoreFlag(cull, RenderSystem::enableCull, RenderSystem::disableCull); + GlStateManager._depthMask(depthMask); + GL11.glFrontFace(frontFace); + GL20.glBlendEquationSeparate(blendEquationRgb, blendEquationAlpha); + GlStateManager._blendFuncSeparate(blendSrcRgb, blendDstRgb, blendSrcAlpha, blendDstAlpha); + + GlStateManager._glUseProgram(program); + GlStateManager._glBindVertexArray(vao); + GlStateManager._glBindBuffer(GL15.GL_ARRAY_BUFFER, arrayBuffer); + GlStateManager._glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, elementArrayBuffer); + GL30.glBindBufferBase(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, BoneSkinShader.ssbo, shaderStorageBase0); + GlStateManager._glBindBuffer(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, shaderStorageBuffer); + + GlStateManager._activeTexture(GL13.GL_TEXTURE0); + GlStateManager._bindTexture(texture0); + GlStateManager._activeTexture(GL13.GL_TEXTURE0 + 1); + GlStateManager._bindTexture(texture1); + GlStateManager._activeTexture(GL13.GL_TEXTURE0 + 2); + GlStateManager._bindTexture(texture2); + GlStateManager._activeTexture(activeTexture); + } + + private static void restoreFlag(boolean enabled, Runnable enable, Runnable disable) { + if (enabled) { + enable.run(); + } else { + disable.run(); + } + } + + @Override + public String toString() { + return "program=" + program + + ",vao=" + vao + + ",arrayBuffer=" + arrayBuffer + + ",elementArrayBuffer=" + elementArrayBuffer + + ",ssbo=" + shaderStorageBuffer + + ",ssbo0=" + shaderStorageBase0 + + ",activeTexture=0x" + Integer.toHexString(activeTexture) + + ",tex0=" + texture0 + + ",tex1=" + texture1 + + ",tex2=" + texture2 + + ",blend=" + blend + + ",depthTest=" + depthTest + + ",cull=" + cull + + ",depthMask=" + depthMask + + ",frontFace=0x" + Integer.toHexString(frontFace); + } + } } From 294410f3d97871b39885f09aca9ad3aefe269470 Mon Sep 17 00:00:00 2001 From: MiRinChan <148533509+MiRinChan@users.noreply.github.com> Date: Thu, 28 May 2026 21:50:49 +0800 Subject: [PATCH 3/6] fix(gpu): rotate bone SSBO uploads Use four per-mesh bone buffers so a new upload does not immediately overwrite data that the driver may still be consuming from an earlier draw. The ring relies on normal driver implicit synchronization; no explicit GL fences are added here. --- common/src/main/java/rip/ysm/gpu/GpuMesh.java | 29 +++++++++++++------ .../main/java/rip/ysm/gpu/GpuMeshBuilder.java | 18 ++++++++---- .../main/java/rip/ysm/gpu/GpuRenderPath.java | 2 +- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/common/src/main/java/rip/ysm/gpu/GpuMesh.java b/common/src/main/java/rip/ysm/gpu/GpuMesh.java index c1d0e9a..246c71f 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuMesh.java +++ b/common/src/main/java/rip/ysm/gpu/GpuMesh.java @@ -13,6 +13,7 @@ public final class GpuMesh { public final int vbo; public final int ibo; public final int boneSsbo; + public final int[] boneSsbos; public final int vertexCount; public final int indexCount; public final int boneCount; @@ -23,14 +24,16 @@ public final class GpuMesh { private int xformVbo = 0; private int xformVao = 0; + private int nextBoneSsboIndex = 0; private boolean disposed = false; - GpuMesh(long pointer, int vao, int vbo, int ibo, int boneSsbo, int vertexCount, int indexCount, int boneCount, int pm1s, int pm1c, int pm2s, int pm2c, int pm3s, int pm3c) { + GpuMesh(long pointer, int vao, int vbo, int ibo, int[] boneSsbos, int vertexCount, int indexCount, int boneCount, int pm1s, int pm1c, int pm2s, int pm2c, int pm3s, int pm3c) { this.pointer = pointer; this.vao = vao; this.vbo = vbo; this.ibo = ibo; - this.boneSsbo = boneSsbo; + this.boneSsbos = boneSsbos; + this.boneSsbo = boneSsbos[0]; this.vertexCount = vertexCount; this.indexCount = indexCount; this.boneCount = boneCount; @@ -57,6 +60,12 @@ public int indexDrawCount(int renderPartMask) { return self + partMask3Count; } + public int nextBoneSsbo() { + int ssbo = boneSsbos[nextBoneSsboIndex]; + nextBoneSsboIndex = (nextBoneSsboIndex + 1) % boneSsbos.length; + return ssbo; + } + public int xformVbo() { return xformVbo; } @@ -69,9 +78,9 @@ public void ensureXformBuffers() { if (xformVao != 0) return; xformVbo = GlStateManager._glGenBuffers(); GlStateManager._glBindBuffer(GL15.GL_ARRAY_BUFFER, xformVbo); - GL45.glBufferData(GL15.GL_ARRAY_BUFFER, (long) vertexCount * 36, GL15.GL_DYNAMIC_DRAW); - xformVao = GL45.glGenVertexArrays(); - GL45.glBindVertexArray(xformVao); + GL15.glBufferData(GL15.GL_ARRAY_BUFFER, (long) vertexCount * 36, GL15.GL_DYNAMIC_DRAW); + xformVao = GL30.glGenVertexArrays(); + GL30.glBindVertexArray(xformVao); GlStateManager._glBindBuffer(GL15.GL_ARRAY_BUFFER, xformVbo); GlStateManager._glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, ibo); GL20.glEnableVertexAttribArray(0); @@ -86,7 +95,7 @@ public void ensureXformBuffers() { GL30.glVertexAttribIPointer(4, 2, GL11.GL_SHORT, 36, 28L); GL20.glEnableVertexAttribArray(5); GL20.glVertexAttribPointer(5, 3, GL11.GL_BYTE, true, 36, 32L); - GL45.glBindVertexArray(0); + GL30.glBindVertexArray(0); GlStateManager._glBindBuffer(GL15.GL_ARRAY_BUFFER, 0); GlStateManager._glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, 0); } @@ -96,10 +105,12 @@ public void dispose() { disposed = true; GlStateManager._glDeleteBuffers(vbo); GlStateManager._glDeleteBuffers(ibo); - GlStateManager._glDeleteBuffers(boneSsbo); - GL45.glDeleteVertexArrays(vao); + for (int ssbo : boneSsbos) { + GlStateManager._glDeleteBuffers(ssbo); + } + GL30.glDeleteVertexArrays(vao); if (xformVbo != 0) GlStateManager._glDeleteBuffers(xformVbo); - if (xformVao != 0) GL45.glDeleteVertexArrays(xformVao); + if (xformVao != 0) GL30.glDeleteVertexArrays(xformVao); if (pointer != 0) { GeoModel.nFreeGpuMesh(pointer); } diff --git a/common/src/main/java/rip/ysm/gpu/GpuMeshBuilder.java b/common/src/main/java/rip/ysm/gpu/GpuMeshBuilder.java index 6c61448..64e2a36 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuMeshBuilder.java +++ b/common/src/main/java/rip/ysm/gpu/GpuMeshBuilder.java @@ -3,12 +3,17 @@ import com.elfmcys.yesstevemodel.geckolib3.geo.render.built.GeoModel; import com.mojang.blaze3d.platform.GlStateManager; import com.mojang.blaze3d.systems.RenderSystem; +import org.lwjgl.opengl.ARBShaderStorageBufferObject; import org.lwjgl.opengl.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; public final class GpuMeshBuilder { + // Four slots cover the typical 2-3 frame GPU pipeline depth plus the current upload. + // This relies on driver-side implicit synchronization instead of explicit GL fences. + private static final int BONE_SSBO_RING_SIZE = 4; + public static GpuMesh build(GeoModel model) { if (model.bakedBones == null || model.bakedBones.isEmpty()) return null; RenderSystem.assertOnRenderThread(); @@ -34,7 +39,7 @@ public static GpuMesh build(GeoModel model) { int vao = GL30.glGenVertexArrays(); int vbo = GlStateManager._glGenBuffers(); int ibo = GlStateManager._glGenBuffers(); - int ssbo = GlStateManager._glGenBuffers(); + int[] boneSsbos = new int[BONE_SSBO_RING_SIZE]; GL30.glBindVertexArray(vao); GlStateManager._glBindBuffer(GL15.GL_ARRAY_BUFFER, vbo); @@ -58,12 +63,15 @@ public static GpuMesh build(GeoModel model) { GlStateManager._glBindBuffer(GL15.GL_ARRAY_BUFFER, 0); GlStateManager._glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, 0); - GlStateManager._glBindBuffer(GL43.GL_SHADER_STORAGE_BUFFER, ssbo); - GL45.glBufferData(GL43.GL_SHADER_STORAGE_BUFFER, (long) boneCount * 144, GL15.GL_DYNAMIC_DRAW); - GlStateManager._glBindBuffer(GL43.GL_SHADER_STORAGE_BUFFER, 0); + for (int i = 0; i < boneSsbos.length; i++) { + boneSsbos[i] = GlStateManager._glGenBuffers(); + GlStateManager._glBindBuffer(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, boneSsbos[i]); + GL15.glBufferData(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, (long) boneCount * 144, GL15.GL_DYNAMIC_DRAW); + } + GlStateManager._glBindBuffer(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, 0); GeoModel.nReleaseGpuMeshScratch(handle); - return new GpuMesh(handle, vao, vbo, ibo, ssbo, vertexCount, indexCount, boneCount, meta[3], meta[4], meta[5], meta[6], meta[7], meta[8]); + return new GpuMesh(handle, vao, vbo, ibo, boneSsbos, vertexCount, indexCount, boneCount, meta[3], meta[4], meta[5], meta[6], meta[7], meta[8]); } private static ByteBuffer serializeModel(GeoModel model) { diff --git a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java index e87bcbb..4554576 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java +++ b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java @@ -131,7 +131,7 @@ public static boolean tryRender( GlStateManager._activeTexture(GL13.GL_TEXTURE0); GlStateManager._bindTexture(modelTexId); - int boneSsbo = mesh.boneSsbo; + int boneSsbo = mesh.nextBoneSsbo(); GL15.glBindBuffer(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, boneSsbo); GL15.glBufferSubData(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, 0L, boneBuf); GL30.glBindBufferBase(ARBShaderStorageBufferObject.GL_SHADER_STORAGE_BUFFER, BoneSkinShader.ssbo, boneSsbo); From 26a7a1903d693f855a30f3207d43adfd8276bfdc Mon Sep 17 00:00:00 2001 From: MiRinChan <148533509+MiRinChan@users.noreply.github.com> Date: Thu, 28 May 2026 23:50:33 +0800 Subject: [PATCH 4/6] feat(gpu): ignore not render to game --- .../yesstevemodel/geckolib3/geo/NativeModelRenderer.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java b/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java index bc66f34..fb8ac41 100644 --- a/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java +++ b/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java @@ -38,7 +38,7 @@ public static void renderMesh(VertexConsumer buffer, PoseStack.Pose pose, GeoMod boolean isPreview = ModelPreviewRenderer.isPreview() || ModelPreviewRenderer.isExtraPlayer(); String gpuRenderContext = ModelPreviewRenderer.isExtraPlayer() ? "paperdoll" : (ModelPreviewRenderer.isPreview() ? "preview" : "world"); - if (textureLocation != null && NativeLibLoader.isLoaded() && !GeneralConfig.USE_COMPATIBILITY_RENDERER.get() && GeneralConfig.USE_GPU_RENDERER.get()) { + if (textureLocation != null && NativeLibLoader.isLoaded() && !GeneralConfig.USE_COMPATIBILITY_RENDERER.get() && GeneralConfig.USE_GPU_RENDERER.get() && canDirectRenderTo(buffer)) { if (!GpuCapability.isAvailable()) { GpuRenderPath.debugFallback(gpuRenderContext, GpuCapability.getReason(), renderPartMask, packedLight, textureLocation); @@ -88,6 +88,10 @@ public static void renderMesh(VertexConsumer buffer, PoseStack.Pose pose, GeoMod } } + private static boolean canDirectRenderTo(VertexConsumer buffer) { + return buffer != null && "com.mojang.blaze3d.vertex.BufferBuilder".equals(buffer.getClass().getName()); + } + public static void renderModel( VertexConsumer vertexConsumer, PoseStack.Pose pose, From 79d413a77b61e1a8c7fa390e76733af6598b75d2 Mon Sep 17 00:00:00 2001 From: MiRinChan <148533509+MiRinChan@users.noreply.github.com> Date: Fri, 29 May 2026 00:10:23 +0800 Subject: [PATCH 5/6] feat(gpu): send vertex to VertexConsumer --- .../geckolib3/geo/NativeModelRenderer.java | 6 +- common/src/main/java/rip/ysm/gpu/GpuMesh.java | 25 +++ .../main/java/rip/ysm/gpu/GpuRenderPath.java | 206 ++++++++++++++++++ 3 files changed, 236 insertions(+), 1 deletion(-) diff --git a/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java b/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java index fb8ac41..464ef83 100644 --- a/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java +++ b/common/src/main/java/com/elfmcys/yesstevemodel/geckolib3/geo/NativeModelRenderer.java @@ -38,10 +38,14 @@ public static void renderMesh(VertexConsumer buffer, PoseStack.Pose pose, GeoMod boolean isPreview = ModelPreviewRenderer.isPreview() || ModelPreviewRenderer.isExtraPlayer(); String gpuRenderContext = ModelPreviewRenderer.isExtraPlayer() ? "paperdoll" : (ModelPreviewRenderer.isPreview() ? "preview" : "world"); - if (textureLocation != null && NativeLibLoader.isLoaded() && !GeneralConfig.USE_COMPATIBILITY_RENDERER.get() && GeneralConfig.USE_GPU_RENDERER.get() && canDirectRenderTo(buffer)) { + if (textureLocation != null && NativeLibLoader.isLoaded() && !GeneralConfig.USE_COMPATIBILITY_RENDERER.get() && GeneralConfig.USE_GPU_RENDERER.get()) { if (!GpuCapability.isAvailable()) { GpuRenderPath.debugFallback(gpuRenderContext, GpuCapability.getReason(), renderPartMask, packedLight, textureLocation); + } else if (!canDirectRenderTo(buffer)) { + if (GpuRenderPath.tryRenderToConsumer(buffer, model, pose, boneParams, stateBuffer, renderPartMask, packedLight, packedOverlay, red, green, blue, alpha, textureLocation, gpuRenderContext)) { + return; + } } else if (OculusCompat.isShaderPackInUse() && !isPreview) { if (IrisRenderPath.tryRender(model, pose, boneParams, renderPartMask, packedLight, packedOverlay, red, green, blue, alpha, textureLocation)) { return; diff --git a/common/src/main/java/rip/ysm/gpu/GpuMesh.java b/common/src/main/java/rip/ysm/gpu/GpuMesh.java index 246c71f..e5959b1 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuMesh.java +++ b/common/src/main/java/rip/ysm/gpu/GpuMesh.java @@ -6,6 +6,7 @@ import org.lwjgl.system.MemoryUtil; import java.nio.ByteBuffer; +import java.nio.ByteOrder; public final class GpuMesh { public final long pointer; @@ -24,6 +25,8 @@ public final class GpuMesh { private int xformVbo = 0; private int xformVao = 0; + private ByteBuffer xformReadbackBuffer; + private ByteBuffer indexReadbackBuffer; private int nextBoneSsboIndex = 0; private boolean disposed = false; @@ -100,6 +103,26 @@ public void ensureXformBuffers() { GlStateManager._glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, 0); } + public ByteBuffer xformReadbackBuffer() { + xformReadbackBuffer = ensureReadbackCapacity(xformReadbackBuffer, vertexCount * 36); + return xformReadbackBuffer; + } + + public ByteBuffer indexReadbackBuffer(int indexBytes) { + indexReadbackBuffer = ensureReadbackCapacity(indexReadbackBuffer, indexBytes); + return indexReadbackBuffer; + } + + private static ByteBuffer ensureReadbackCapacity(ByteBuffer buffer, int requiredBytes) { + if (buffer == null || buffer.capacity() < requiredBytes) { + if (buffer != null) MemoryUtil.memFree(buffer); + buffer = MemoryUtil.memAlloc(requiredBytes).order(ByteOrder.nativeOrder()); + } + buffer.clear(); + buffer.limit(requiredBytes); + return buffer; + } + public void dispose() { if (disposed) return; disposed = true; @@ -115,5 +138,7 @@ public void dispose() { GeoModel.nFreeGpuMesh(pointer); } MemoryUtil.memFree(perFrameBoneBuffer); + if (xformReadbackBuffer != null) MemoryUtil.memFree(xformReadbackBuffer); + if (indexReadbackBuffer != null) MemoryUtil.memFree(indexReadbackBuffer); } } diff --git a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java index 4554576..b7c5816 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java +++ b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java @@ -6,6 +6,7 @@ import com.mojang.blaze3d.platform.GlStateManager; import com.mojang.blaze3d.systems.RenderSystem; import com.mojang.blaze3d.vertex.PoseStack; +import com.mojang.blaze3d.vertex.VertexConsumer; import net.minecraft.client.Minecraft; import net.minecraft.client.renderer.texture.AbstractTexture; import net.minecraft.resources.ResourceLocation; @@ -23,6 +24,10 @@ public final class GpuRenderPath { private static final float[] rootPoseScratch = new float[16]; private static final float[] rootNormalScratch = new float[9]; private static final float[] projScratch = new float[16]; + private static final float[] identityScratch = new Matrix4f().get(new float[16]); + /** Byte stride of one vertex in the compute shader (bone_xform.csh) readback output. */ + private static final int READBACK_STRIDE = 36; + private static final int[] quadIndexScratch = new int[6]; private static final Matrix4f projMVScratch = new Matrix4f(); private static final Vector3f[] currentLights = new Vector3f[2]; private static final ConcurrentHashMap meshMap = new ConcurrentHashMap<>(); @@ -196,6 +201,207 @@ public static boolean tryRender( } } + public static boolean tryRenderToConsumer( + VertexConsumer vertexConsumer, + GeoModel model, + PoseStack.Pose pose, + float[] boneParams, + float[] stateBuffer, + int renderPartMask, + int packedLight, + int packedOverlay, + float r, float g, float b, float a, + ResourceLocation textureLocation, + String renderContext + ) { + if (vertexConsumer == null) { + debugFallback(renderContext, "missing vertex consumer", renderPartMask, packedLight, textureLocation); + return false; + } + if (!GpuCapability.isAvailable()) { + debugFallback(renderContext, GpuCapability.getReason(), renderPartMask, packedLight, textureLocation); + return false; + } + if (!GL.getCapabilities().OpenGL43) { + debugFallback(renderContext, "OpenGL 4.3 compute shader unavailable", renderPartMask, packedLight, textureLocation); + return false; + } + if (!BoneXformCompute.ensureCompiled()) { + debugFallback(renderContext, "compute shader compile failed", renderPartMask, packedLight, textureLocation); + return false; + } + if (model.bakedBones == null || model.bakedBones.isEmpty()) { + debugFallback(renderContext, "empty model", renderPartMask, packedLight, textureLocation); + return false; + } + + GlStateSnapshot snapshot = GlStateSnapshot.capture(); + int drawCount = 0; + try { + GpuMesh mesh = getOrBuildMesh(model); + if (mesh == null) { + debugFallback(renderContext, "mesh build failed", renderPartMask, packedLight, textureLocation); + return false; + } + mesh.ensureXformBuffers(); + + Matrix4f rootPose = pose.pose(); + Matrix3f rootNormal = pose.normal(); + rootPose.get(rootPoseScratch); + rootNormal.get(rootNormalScratch); + + ByteBuffer boneBuf = mesh.perFrameBoneBuffer; + boneBuf.clear(); + + updatePivotAbsStateBuffer(model, boneParams, stateBuffer); + + GeoModel.nComputeBoneMatrices(mesh.pointer, rootPoseScratch, rootNormalScratch, boneParams, packedLight, boneBuf); + boneBuf.position(0); + boneBuf.limit(mesh.boneCount * 144); + + int boneSsbo = mesh.nextBoneSsbo(); + GL15.glBindBuffer(GL43.GL_SHADER_STORAGE_BUFFER, boneSsbo); + GL15.glBufferSubData(GL43.GL_SHADER_STORAGE_BUFFER, 0L, boneBuf); + + GlStateManager._glUseProgram(BoneXformCompute.program()); + if (BoneXformCompute.locColor() >= 0) GL20.glUniform4f(BoneXformCompute.locColor(), r, g, b, a); + if (BoneXformCompute.locOverlay() >= 0) GL20.glUniform1i(BoneXformCompute.locOverlay(), packedOverlay); + if (BoneXformCompute.locModelView() >= 0) { + GL20.glUniformMatrix4fv(BoneXformCompute.locModelView(), false, identityScratch); + } + + GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, 0, mesh.vbo); + GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, 1, mesh.xformVbo()); + GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, 2, boneSsbo); + + GL43.glDispatchCompute(BoneXformCompute.dispatchGroupCount(mesh.vertexCount), 1, 1); + GL42.glMemoryBarrier(GL42.GL_ALL_BARRIER_BITS); + + ByteBuffer vertices = mesh.xformReadbackBuffer(); + GlStateManager._glBindBuffer(GL15.GL_ARRAY_BUFFER, mesh.xformVbo()); + GL15.glGetBufferSubData(GL15.GL_ARRAY_BUFFER, 0L, vertices); + + drawCount = mesh.indexDrawCount(renderPartMask); + if (drawCount > 0) { + ByteBuffer indices = mesh.indexReadbackBuffer(drawCount * Integer.BYTES); + GlStateManager._glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, mesh.ibo); + GL15.glGetBufferSubData(GL15.GL_ELEMENT_ARRAY_BUFFER, mesh.indexOffsetBytes(renderPartMask), indices); + submitReadbackVertices(vertexConsumer, vertices, indices, drawCount); + } + + debugSuccess(renderContext + ":consumer", drawCount, renderPartMask, packedLight, textureLocation, snapshot); + return true; + } catch (Throwable t) { + debugFallback(renderContext, "consumer exception: " + t.getClass().getSimpleName() + ": " + t.getMessage(), drawCount, renderPartMask, packedLight, textureLocation, snapshot); + YesSteveModel.LOGGER.error("[YSM GPU] GPU consumer render path failed; falling back for this draw", t); + return false; + } finally { + GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, 0, 0); + GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, 1, 0); + GL43.glBindBufferBase(GL43.GL_SHADER_STORAGE_BUFFER, 2, 0); + GL15.glBindBuffer(GL43.GL_SHADER_STORAGE_BUFFER, 0); + GlStateManager._glUseProgram(0); + com.mojang.blaze3d.vertex.BufferUploader.invalidate(); + snapshot.restore(); + } + } + + private static void submitReadbackVertices(VertexConsumer vertexConsumer, ByteBuffer vertices, ByteBuffer indices, int drawCount) { + int[] quad = quadIndexScratch; + int i = 0; + for (; i + 5 < drawCount; i += 6) { + int min = Integer.MAX_VALUE; + int max = Integer.MIN_VALUE; + for (int k = 0; k < 6; k++) { + int idx = indices.getInt((i + k) * Integer.BYTES); + quad[k] = idx; + min = Math.min(min, idx); + max = Math.max(max, idx); + } + // The target RenderType uses QUADS draw mode (4 verts/primitive) while the index + // buffer is triangulated (6 indices/quad). A contiguous run of exactly 4 distinct + // vertices reconstructs into a single quad; otherwise fall back to two triangles. + if (max - min == 3 && coversContiguousQuad(quad, min)) { + if (!isHiddenQuad(vertices, min)) { + submitReadbackVertex(vertexConsumer, vertices, min); + submitReadbackVertex(vertexConsumer, vertices, min + 1); + submitReadbackVertex(vertexConsumer, vertices, min + 2); + submitReadbackVertex(vertexConsumer, vertices, min + 3); + } + continue; + } + + submitReadbackTriangle(vertexConsumer, vertices, quad[0], quad[1], quad[2]); + submitReadbackTriangle(vertexConsumer, vertices, quad[3], quad[4], quad[5]); + } + for (; i + 2 < drawCount; i += 3) { + int idx0 = indices.getInt(i * Integer.BYTES); + int idx1 = indices.getInt((i + 1) * Integer.BYTES); + int idx2 = indices.getInt((i + 2) * Integer.BYTES); + submitReadbackTriangle(vertexConsumer, vertices, idx0, idx1, idx2); + } + } + + private static boolean coversContiguousQuad(int[] indices, int min) { + for (int offset = 0; offset < 4; offset++) { + if (!hasIndex(indices, min + offset)) { + return false; + } + } + return true; + } + + private static boolean hasIndex(int[] indices, int value) { + for (int idx : indices) { + if (idx == value) { + return true; + } + } + return false; + } + + private static boolean isHiddenQuad(ByteBuffer vertices, int min) { + return isHiddenReadbackVertex(vertices, min) + && isHiddenReadbackVertex(vertices, min + 1) + && isHiddenReadbackVertex(vertices, min + 2) + && isHiddenReadbackVertex(vertices, min + 3); + } + + private static void submitReadbackTriangle(VertexConsumer vertexConsumer, ByteBuffer vertices, int idx0, int idx1, int idx2) { + if (isHiddenReadbackVertex(vertices, idx0) && isHiddenReadbackVertex(vertices, idx1) && isHiddenReadbackVertex(vertices, idx2)) { + return; + } + submitReadbackVertex(vertexConsumer, vertices, idx0); + submitReadbackVertex(vertexConsumer, vertices, idx1); + submitReadbackVertex(vertexConsumer, vertices, idx2); + } + + private static boolean isHiddenReadbackVertex(ByteBuffer vertices, int index) { + int base = index * READBACK_STRIDE; + return vertices.getFloat(base) == 2.0f + && vertices.getFloat(base + 4) == 2.0f + && vertices.getFloat(base + 8) == 2.0f; + } + + private static void submitReadbackVertex(VertexConsumer vertexConsumer, ByteBuffer vertices, int index) { + int base = index * READBACK_STRIDE; + float x = vertices.getFloat(base); + float y = vertices.getFloat(base + 4); + float z = vertices.getFloat(base + 8); + float red = (vertices.get(base + 12) & 0xFF) / 255.0f; + float green = (vertices.get(base + 13) & 0xFF) / 255.0f; + float blue = (vertices.get(base + 14) & 0xFF) / 255.0f; + float alpha = (vertices.get(base + 15) & 0xFF) / 255.0f; + float u = vertices.getFloat(base + 16); + float v = vertices.getFloat(base + 20); + int overlay = vertices.getInt(base + 24); + int light = vertices.getInt(base + 28); + float normalX = vertices.get(base + 32) / 127.0f; + float normalY = vertices.get(base + 33) / 127.0f; + float normalZ = vertices.get(base + 34) / 127.0f; + vertexConsumer.vertex(x, y, z, red, green, blue, alpha, u, v, overlay, light, normalX, normalY, normalZ); + } + public static void debugFallback(String renderContext, String reason, int renderPartMask, int packedLight, ResourceLocation textureLocation) { debugFallback(renderContext, reason, -1, renderPartMask, packedLight, textureLocation, null); } From d7b68e888b2c253a81b472cb3a409fba88b29d33 Mon Sep 17 00:00:00 2001 From: MiRinChan <148533509+MiRinChan@users.noreply.github.com> Date: Thu, 28 May 2026 13:40:29 -0400 Subject: [PATCH 6/6] refactor(gpu): dedupe bone matrix upload, simplify quad reconstruction - Extract shared computeBoneMatrices() helper used by both the direct-draw and consumer render paths - Replace coversContiguousQuad/hasIndex scan with an isContiguousQuad bitmask - Remove the now-unused Matrix3f import --- .../main/java/rip/ysm/gpu/GpuRenderPath.java | 78 ++++++++----------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java index b7c5816..d76d8d7 100644 --- a/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java +++ b/common/src/main/java/rip/ysm/gpu/GpuRenderPath.java @@ -10,7 +10,6 @@ import net.minecraft.client.Minecraft; import net.minecraft.client.renderer.texture.AbstractTexture; import net.minecraft.resources.ResourceLocation; -import org.joml.Matrix3f; import org.joml.Matrix4f; import org.joml.Vector3f; import org.lwjgl.opengl.*; @@ -95,24 +94,12 @@ public static boolean tryRender( return false; } - Matrix4f rootPose = pose.pose(); - Matrix3f rootNormal = pose.normal(); Matrix4f projMat = RenderSystem.getProjectionMatrix(); Matrix4f mvMat = RenderSystem.getModelViewMatrix(); - - rootPose.get(rootPoseScratch); - rootNormal.get(rootNormalScratch); projMat.mul(mvMat, projMVScratch); projMVScratch.get(projScratch); - ByteBuffer boneBuf = mesh.perFrameBoneBuffer; - boneBuf.clear(); - - updatePivotAbsStateBuffer(model, boneParams, stateBuffer); - - GeoModel.nComputeBoneMatrices(mesh.pointer, rootPoseScratch, rootNormalScratch, boneParams, packedLight, boneBuf); - boneBuf.position(0); - boneBuf.limit(mesh.boneCount * 144); + ByteBuffer boneBuf = computeBoneMatrices(mesh, model, pose, boneParams, stateBuffer, packedLight); RenderSystem.disableCull(); RenderSystem.enableDepthTest(); @@ -245,19 +232,7 @@ public static boolean tryRenderToConsumer( } mesh.ensureXformBuffers(); - Matrix4f rootPose = pose.pose(); - Matrix3f rootNormal = pose.normal(); - rootPose.get(rootPoseScratch); - rootNormal.get(rootNormalScratch); - - ByteBuffer boneBuf = mesh.perFrameBoneBuffer; - boneBuf.clear(); - - updatePivotAbsStateBuffer(model, boneParams, stateBuffer); - - GeoModel.nComputeBoneMatrices(mesh.pointer, rootPoseScratch, rootNormalScratch, boneParams, packedLight, boneBuf); - boneBuf.position(0); - boneBuf.limit(mesh.boneCount * 144); + ByteBuffer boneBuf = computeBoneMatrices(mesh, model, pose, boneParams, stateBuffer, packedLight); int boneSsbo = mesh.nextBoneSsbo(); GL15.glBindBuffer(GL43.GL_SHADER_STORAGE_BUFFER, boneSsbo); @@ -306,6 +281,24 @@ public static boolean tryRenderToConsumer( } } + /** + * Computes this frame's per-bone skinning matrices into the mesh's reusable bone buffer and + * returns it positioned/limited ready for upload. Shared by the direct-draw and consumer paths. + */ + private static ByteBuffer computeBoneMatrices(GpuMesh mesh, GeoModel model, PoseStack.Pose pose, float[] boneParams, float[] stateBuffer, int packedLight) { + pose.pose().get(rootPoseScratch); + pose.normal().get(rootNormalScratch); + + ByteBuffer boneBuf = mesh.perFrameBoneBuffer; + boneBuf.clear(); + updatePivotAbsStateBuffer(model, boneParams, stateBuffer); + + GeoModel.nComputeBoneMatrices(mesh.pointer, rootPoseScratch, rootNormalScratch, boneParams, packedLight, boneBuf); + boneBuf.position(0); + boneBuf.limit(mesh.boneCount * 144); + return boneBuf; + } + private static void submitReadbackVertices(VertexConsumer vertexConsumer, ByteBuffer vertices, ByteBuffer indices, int drawCount) { int[] quad = quadIndexScratch; int i = 0; @@ -319,9 +312,10 @@ private static void submitReadbackVertices(VertexConsumer vertexConsumer, ByteBu max = Math.max(max, idx); } // The target RenderType uses QUADS draw mode (4 verts/primitive) while the index - // buffer is triangulated (6 indices/quad). A contiguous run of exactly 4 distinct - // vertices reconstructs into a single quad; otherwise fall back to two triangles. - if (max - min == 3 && coversContiguousQuad(quad, min)) { + // buffer is triangulated (6 indices/quad). When the six indices reference exactly the + // four contiguous vertices [min, min+3] they reconstruct into a single quad; otherwise + // fall back to two triangles. + if (max - min == 3 && isContiguousQuad(quad, min)) { if (!isHiddenQuad(vertices, min)) { submitReadbackVertex(vertexConsumer, vertices, min); submitReadbackVertex(vertexConsumer, vertices, min + 1); @@ -342,22 +336,16 @@ private static void submitReadbackVertices(VertexConsumer vertexConsumer, ByteBu } } - private static boolean coversContiguousQuad(int[] indices, int min) { - for (int offset = 0; offset < 4; offset++) { - if (!hasIndex(indices, min + offset)) { - return false; - } - } - return true; - } - - private static boolean hasIndex(int[] indices, int value) { - for (int idx : indices) { - if (idx == value) { - return true; - } + /** + * True when the six triangulated indices reference exactly the four contiguous vertices + * [min, min+3]. The caller guarantees {@code max - min == 3}, so every index sits in that range. + */ + private static boolean isContiguousQuad(int[] quad, int min) { + int seen = 0; + for (int idx : quad) { + seen |= 1 << (idx - min); } - return false; + return seen == 0b1111; } private static boolean isHiddenQuad(ByteBuffer vertices, int min) {