Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions src/latent-preview.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,138 @@
#include "ggml.h"
#include "tensor.hpp"

const float ltxav_latent_rgb_proj[128][3] = {
{-0.0293802f, -0.0362516f, -0.0291386f},
{0.0117735f, 0.0223435f, 0.018856f},
{0.00922335f, 0.0145666f, 0.0038772f},
{0.0227299f, 0.0109122f, 0.0131384f},
{0.00192413f, 0.0024648f, 0.00689245f},
{-0.0105576f, -0.0135933f, -0.00873841f},
{-0.0310222f, -0.0396358f, -0.0408445f},
{0.0149737f, 0.0316323f, 0.03415f},
{0.0027752f, 0.00814889f, 0.0108575f},
{-0.000678017f, -0.00180589f, -0.0161684f},
{0.0153964f, 0.0159774f, 0.0186479f},
{-0.0222799f, -0.0202068f, -0.0181082f},
{0.0128696f, 0.00754416f, -0.00673279f},
{0.0142729f, 0.00448099f, -0.00193934f},
{-0.014066f, -0.0193755f, -0.0160104f},
{-0.0176785f, -0.015903f, -0.0152621f},
{0.0307381f, 0.0292082f, 0.0328668f},
{0.0332928f, 0.0368629f, 0.0440893f},
{0.0186304f, 0.0124069f, 0.0160734f},
{0.00477787f, -0.00315658f, -0.000145702f},
{0.0183099f, 0.0122593f, 0.00599732f},
{-0.0194551f, -0.0183924f, -0.0147465f},
{0.0025732f, 0.00442582f, 0.0173176f},
{-0.0169423f, -0.0293863f, -0.0225908f},
{-0.021228f, -0.0265094f, -0.0253049f},
{0.0327111f, 0.0187133f, 0.0266184f},
{-0.0226425f, -0.0313781f, -0.0414356f},
{-0.0163142f, -0.0146144f, -0.0171793f},
{0.0192183f, 0.0108411f, 0.00829186f},
{-0.032246f, -0.0274846f, -0.0287434f},
{0.00345399f, 0.0115567f, 0.015288f},
{0.000972292f, 0.00331303f, 0.0110501f},
{0.000939494f, -0.00705084f, -0.00979449f},
{0.0405155f, 0.0339534f, 0.0419513f},
{0.0198596f, 0.0186626f, 0.0213766f},
{-0.00982375f, -0.00880439f, -0.00470429f},
{-0.0313707f, -0.0258098f, -0.0211663f},
{0.0144159f, 0.0117896f, 0.0141573f},
{0.0164571f, 0.0149178f, 0.00921599f},
{0.0436184f, 0.0346583f, 0.0360647f},
{-0.00289744f, -0.000752502f, 0.000675415f},
{-0.00621715f, -0.000558851f, 0.0135814f},
{-0.00817579f, -0.0113584f, -0.00556793f},
{0.00965067f, 0.0178221f, 0.015821f},
{0.0211832f, 0.0180827f, 0.0154707f},
{-0.00412858f, -0.00374182f, 0.0029568f},
{-0.0175603f, -0.0226242f, -0.0279012f},
{-0.00437471f, -0.00668329f, 0.000164887f},
{-0.0355983f, -0.0419093f, -0.0383065f},
{0.0144314f, 0.0192514f, 0.0175639f},
{-0.0130693f, -0.00569884f, -0.00341647f},
{-0.00184689f, 0.00189034f, -0.00190561f},
{0.019457f, 0.00842282f, 0.0123738f},
{-0.00477146f, -0.00206932f, 0.00283336f},
{-0.0364544f, -0.0256141f, -0.0322336f},
{-0.0295634f, -0.0295048f, -0.021057f},
{0.0144484f, 0.0191862f, 0.0112445f},
{0.0536406f, 0.0582376f, 0.0570966f},
{0.0085178f, 0.00748455f, 0.00995162f},
{-0.0136637f, -0.0172914f, -0.0195978f},
{-0.0339128f, -0.0392692f, -0.0355216f},
{0.00612855f, 0.00568303f, -0.00212333f},
{-0.0029225f, 0.00668819f, 0.0122131f},
{0.00841843f, 0.000181587f, -0.00650644f},
{-0.00514432f, 0.0127043f, 0.0168049f},
{-0.00997384f, -0.00602262f, -0.0164031f},
{0.0233226f, 0.033254f, 0.0307266f},
{-0.0110201f, -0.0164169f, -0.0161829f},
{-0.0195952f, -0.0177943f, -0.0115377f},
{-0.00523918f, -0.00452043f, 0.00267397f},
{0.0313464f, 0.0288241f, 0.0262496f},
{0.0324018f, 0.0339792f, 0.0312209f},
{-0.0163247f, -0.0230503f, -0.0263239f},
{0.000420577f, -0.00535659f, -0.00663426f},
{-0.012897f, -0.00203767f, -0.000622678f},
{-0.0632956f, -0.0651325f, -0.0584479f},
{-0.00426634f, -0.0150098f, -0.00719348f},
{0.00476109f, 0.00674315f, 0.00895472f},
{0.0129384f, 0.0158352f, 0.00963773f},
{-0.0333379f, -0.0410522f, -0.0317462f},
{0.00344054f, 0.00275915f, 0.00355732f},
{0.0209062f, 0.0273453f, 0.0222967f},
{0.00827287f, 0.00223045f, 0.00325844f},
{-0.0149132f, -0.0183973f, -0.0199781f},
{-0.0100786f, -0.0103681f, -0.00218224f},
{-0.00791409f, -0.00405153f, -0.00599893f},
{0.0176126f, 0.00618342f, -6.6569e-05f},
{0.00942486f, -0.00206494f, -0.00580324f},
{0.00678093f, -0.00291742f, -0.000921195f},
{-0.0221992f, -0.00483162f, -0.000848514f},
{-0.0151587f, -0.0157166f, -0.0107302f},
{0.00909646f, 0.0171985f, 0.0169785f},
{0.0127224f, 0.0170612f, 0.0303428f},
{0.0196562f, 0.00212451f, 0.0127744f},
{0.0233013f, 0.0228994f, 0.0108387f},
{0.00520761f, 0.00992992f, 0.0066267f},
{-3.77736e-05f, 0.00460229f, -0.00475132f},
{-0.0311763f, -0.0453566f, -0.0486901f},
{0.0195798f, 0.0281246f, 0.0180102f},
{-0.0174149f, -0.0240867f, -0.0188785f},
{0.000104658f, 0.00659008f, 0.0144594f},
{-0.00311086f, -0.0241426f, -0.0244164f},
{0.0336462f, 0.0305173f, 0.0331101f},
{0.0613625f, 0.066561f, 0.0610198f},
{-0.0286757f, -0.0325401f, -0.0338036f},
{0.0141534f, 0.0188266f, 0.0253059f},
{-0.00548197f, -0.00170198f, 0.00561745f},
{-0.0117872f, -0.00763218f, -0.0145037f},
{-0.0253304f, -0.0245217f, -0.0144905f},
{-0.00393624f, 0.00350048f, 0.00765561f},
{0.0113625f, 0.00561576f, -0.0113672f},
{-0.0301278f, -0.0261472f, -0.0301903f},
{0.016863f, 0.0173781f, 0.0170916f},
{-0.00495108f, 0.00686749f, 0.00282767f},
{0.00125409f, -0.00378072f, -0.00264117f},
{-0.00264001f, -0.00529772f, -0.0113109f},
{-0.054888f, -0.0575461f, -0.0509146f},
{-0.019442f, -0.0232916f, -0.0258637f},
{0.0133362f, 0.0161808f, 0.00917951f},
{-0.0349002f, -0.0372642f, -0.0466206f},
{-0.00216926f, 0.00208738f, 0.00766492f},
{0.0268528f, 0.0301179f, 0.0228579f},
{0.0226176f, 0.021536f, 0.023152f},
{-0.0110646f, -0.00511349f, -0.0137346f},
{-0.0098424f, -0.00218176f, 0.00414545f},
{0.00200216f, 0.00441732f, -0.0136515f},
{0.00695946f, 0.00313109f, -0.00379435f},
{0.0188377f, 0.0144059f, 0.0229724f},
};
float ltxav_latent_rgb_bias[3] = {0.043849f, 0.0201085f, 0.0150286f};

const float wan_21_latent_rgb_proj[16][3] = {
{0.015123f, -0.148418f, 0.479828f},
{0.003652f, -0.010680f, -0.037142f},
Expand Down
23 changes: 19 additions & 4 deletions src/stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1606,17 +1606,32 @@ class StableDiffusionGGML {
void* step_callback_data,
bool is_noisy) {
if (preview_mode == PREVIEW_PROJ) {
sd::Tensor<float> _latents = latents;
int patch_sz = 1;
const float(*latent_rgb_proj)[3] = nullptr;
float* latent_rgb_bias = nullptr;
bool is_video = preview_latent_tensor_is_video(latents);
uint32_t dim = is_video ? static_cast<uint32_t>(latents.shape()[3]) : static_cast<uint32_t>(latents.shape()[2]);
if (version == VERSION_LTXAV) {
if (is_video) {
_latents = sd::ops::slice(_latents, 3, 0, 128);
} else {
_latents = sd::ops::slice(_latents, 2, 0, 128);
}
dim = 128;
}

if (dim == 128) {
if (sd_version_uses_flux2_vae(version)) {
latent_rgb_proj = flux2_latent_rgb_proj;
latent_rgb_bias = flux2_latent_rgb_bias;
patch_sz = 2;
} else if (version == VERSION_LTXAV) {
latent_rgb_proj = ltxav_latent_rgb_proj;
latent_rgb_bias = ltxav_latent_rgb_bias;
} else {
LOG_WARN("No latent to RGB projection known for this model");
return;
}
} else if (dim == 48) {
if (sd_version_is_wan(version)) {
Expand Down Expand Up @@ -1656,13 +1671,13 @@ class StableDiffusionGGML {
return;
}

uint32_t frames = is_video ? static_cast<uint32_t>(latents.shape()[2]) : 1;
uint32_t img_width = static_cast<uint32_t>(latents.shape()[0]) * patch_sz;
uint32_t img_height = static_cast<uint32_t>(latents.shape()[1]) * patch_sz;
uint32_t frames = is_video ? static_cast<uint32_t>(_latents.shape()[2]) : 1;
uint32_t img_width = static_cast<uint32_t>(_latents.shape()[0]) * patch_sz;
uint32_t img_height = static_cast<uint32_t>(_latents.shape()[1]) * patch_sz;

uint8_t* data = (uint8_t*)malloc(frames * img_width * img_height * 3 * sizeof(uint8_t));
GGML_ASSERT(data != nullptr);
preview_latent_video(data, latents, latent_rgb_proj, latent_rgb_bias, patch_sz);
preview_latent_video(data, _latents, latent_rgb_proj, latent_rgb_bias, patch_sz);
sd_image_t* images = (sd_image_t*)malloc(frames * sizeof(sd_image_t));
GGML_ASSERT(images != nullptr);
for (uint32_t i = 0; i < frames; i++) {
Expand Down
Loading