diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 572481889..c59a9733f 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -4,11 +4,13 @@ #include #include #include +#include #include #include #include "ggml_extend.hpp" #include "gits_noise.inl" +#include "guidance.h" #include "tensor.hpp" /*================================================= CompVisDenoiser ==================================================*/ @@ -894,7 +896,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser { } }; -typedef std::function(const sd::Tensor&, float, int, sd::Tensor*)> denoise_cb_t; +typedef std::function&, float, int)> denoise_cb_t; static std::pair get_ancestral_step(float sigma_from, float sigma_to, @@ -972,11 +974,11 @@ static sd::Tensor sample_euler_ancestral(denoise_cb_t model, for (int i = 0; i < steps; i++) { float sigma = sigmas[i]; float sigma_to = sigmas[i + 1]; - auto denoised_opt = model(x, sigma, i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); if (sigma_to == 0.f) { x = denoised; } else if (eta == 0.f) { @@ -1003,11 +1005,11 @@ static sd::Tensor sample_euler(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { float sigma = sigmas[i]; - auto denoised_opt = model(x, sigma, i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); sd::Tensor d = (x - denoised) / sigma; x += d * (sigmas[i + 1] - sigma); } @@ -1019,22 +1021,22 @@ static sd::Tensor sample_heun(denoise_cb_t model, const std::vector& sigmas) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigmas[i], -(i + 1)); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); sd::Tensor d = (x - denoised) / sigmas[i]; float dt = sigmas[i + 1] - sigmas[i]; if (sigmas[i + 1] == 0) { x += d * dt; } else { sd::Tensor x2 = x + d * dt; - auto denoised2_opt = model(x2, sigmas[i + 1], i + 1, nullptr); - if (denoised2_opt.empty()) { + auto denoised2_opt = model(x2, sigmas[i + 1], i + 1); + if (denoised2_opt.pred.empty()) { return {}; } - sd::Tensor denoised2 = std::move(denoised2_opt); + sd::Tensor denoised2 = std::move(denoised2_opt.pred); d = (d + (x2 - denoised2) / sigmas[i + 1]) / 2.0f; x += d * dt; } @@ -1047,11 +1049,11 @@ static sd::Tensor sample_dpm2(denoise_cb_t model, const std::vector& sigmas) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigmas[i], -(i + 1)); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); sd::Tensor d = (x - denoised) / sigmas[i]; if (sigmas[i + 1] == 0) { x += d * (sigmas[i + 1] - sigmas[i]); @@ -1060,11 +1062,11 @@ static sd::Tensor sample_dpm2(denoise_cb_t model, float dt_1 = sigma_mid - sigmas[i]; float dt_2 = sigmas[i + 1] - sigmas[i]; sd::Tensor x2 = x + d * dt_1; - auto denoised2_opt = model(x2, sigma_mid, i + 1, nullptr); - if (denoised2_opt.empty()) { + auto denoised2_opt = model(x2, sigma_mid, i + 1); + if (denoised2_opt.pred.empty()) { return {}; } - sd::Tensor denoised2 = std::move(denoised2_opt); + sd::Tensor denoised2 = std::move(denoised2_opt.pred); x += ((x2 - denoised2) / sigma_mid) * dt_2; } } @@ -1081,11 +1083,11 @@ static sd::Tensor sample_dpmpp_2s_ancestral(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigmas[i], -(i + 1)); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta); if (sigma_down == 0) { @@ -1097,11 +1099,11 @@ static sd::Tensor sample_dpmpp_2s_ancestral(denoise_cb_t model, float s = t + 0.5f * h; float sigma_s = sigma_fn(s); sd::Tensor x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised; - auto denoised2_opt = model(x2, sigma_s, i + 1, nullptr); - if (denoised2_opt.empty()) { + auto denoised2_opt = model(x2, sigma_s, i + 1); + if (denoised2_opt.pred.empty()) { return {}; } - sd::Tensor denoised2 = std::move(denoised2_opt); + sd::Tensor denoised2 = std::move(denoised2_opt.pred); x = (sigma_fn(t_next) / sigma_fn(t)) * x - (exp(-h) - 1) * denoised2; } @@ -1124,11 +1126,11 @@ static sd::Tensor sample_dpmpp_2s_ancestral_flow(denoise_cb_t model, bool opt_first_step = (1.0 - sigma < 1e-6); - auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1), nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1)); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); if (sigma_to == 0.0f) { // Euler method (final step, no noise) @@ -1153,8 +1155,8 @@ static sd::Tensor sample_dpmpp_2s_ancestral_flow(denoise_cb_t model, // so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1 // u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio)) // = (x*1)+(denoised*0) = x - // so D_i = model(u, sigma_s, i + 1, nullptr) - // = model(x, sigma, i + 1, nullptr) + // so D_i = model(u, sigma_s, i + 1) + // = model(x, sigma, i + 1) // = denoised D_i = denoised; @@ -1187,11 +1189,11 @@ static sd::Tensor sample_dpmpp_2s_ancestral_flow(denoise_cb_t model, float sigma_s_i_ratio = sigma_s / sigma; sd::Tensor u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio)); - auto denoised2_opt = model(u, sigma_s, i + 1, nullptr); - if (denoised2_opt.empty()) { + auto denoised2_opt = model(u, sigma_s, i + 1); + if (denoised2_opt.pred.empty()) { return {}; } - D_i = std::move(denoised2_opt); + D_i = std::move(denoised2_opt.pred); } float sigma_down_i_ratio = sigma_down / sigma; @@ -1214,11 +1216,11 @@ static sd::Tensor sample_dpmpp_2m(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigmas[i], i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); float t = t_fn(sigmas[i]); float t_next = t_fn(sigmas[i + 1]); float h = t_next - t; @@ -1246,11 +1248,11 @@ static sd::Tensor sample_dpmpp_2m_v2(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigmas[i], i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); float t = t_fn(sigmas[i]); float t_next = t_fn(sigmas[i + 1]); float h = t_next - t; @@ -1354,11 +1356,11 @@ static sd::Tensor sample_lcm(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigmas[i], i + 1); + if (denoised_opt.pred.empty()) { return {}; } - x = std::move(denoised_opt); + x = std::move(denoised_opt.pred); if (sigmas[i + 1] > 0) { if (is_flow_denoiser) { x *= (1 - sigmas[i + 1]); @@ -1400,11 +1402,11 @@ static sd::Tensor sample_ipndm(denoise_cb_t model, float sigma = sigmas[i]; float sigma_next = sigmas[i + 1]; - auto denoised_opt = model(x, sigma, i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); sd::Tensor d_cur = (x - denoised) / sigma; int order = std::min(max_order, i + 1); @@ -1444,11 +1446,11 @@ static sd::Tensor sample_ipndm_v(denoise_cb_t model, float sigma = sigmas[i]; float t_next = sigmas[i + 1]; - auto denoised_opt = model(x, sigma, i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); sd::Tensor d_cur = (x - denoised) / sigma; int order = std::min(max_order, i + 1); @@ -1506,11 +1508,11 @@ static sd::Tensor sample_res_multistep(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigmas[i], i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); float sigma_from = sigmas[i]; float sigma_to = sigmas[i + 1]; @@ -1583,11 +1585,11 @@ static sd::Tensor sample_res_2s(denoise_cb_t model, float sigma_from = sigmas[i]; float sigma_to = sigmas[i + 1]; - auto denoised_opt = model(x, sigma_from, -(i + 1), nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigma_from, -(i + 1)); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser); @@ -1609,11 +1611,11 @@ static sd::Tensor sample_res_2s(denoise_cb_t model, sd::Tensor eps1 = denoised - x0; sd::Tensor x2 = x0 + eps1 * (h * a21); - auto denoised2_opt = model(x2, sigma_c2, i + 1, nullptr); - if (denoised2_opt.empty()) { + auto denoised2_opt = model(x2, sigma_c2, i + 1); + if (denoised2_opt.pred.empty()) { return {}; } - sd::Tensor denoised2 = std::move(denoised2_opt); + sd::Tensor denoised2 = std::move(denoised2_opt.pred); sd::Tensor eps2 = denoised2 - x0; x = x0 + h * (b1 * eps1 + b2 * eps2); } @@ -1686,10 +1688,11 @@ static sd::Tensor sample_er_sde(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - sd::Tensor denoised = model(x, sigmas[i], i + 1, nullptr); - if (denoised.empty()) { + auto denoised_opt = model(x, sigmas[i], i + 1); + if (denoised_opt.pred.empty()) { return {}; } + sd::Tensor denoised = std::move(denoised_opt.pred); int stage_used = std::min(max_stage, i + 1); @@ -1804,11 +1807,11 @@ static sd::Tensor sample_tcd(denoise_cb_t model, int timestep_s = (int)floor((1 - eta) * prev_timestep); float sigma = sigmas[i]; - auto denoised_opt = model(x, sigma, i + 1, nullptr); - if (denoised_opt.empty()) { + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.pred.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor denoised = std::move(denoised_opt.pred); sd::Tensor d = (x - denoised) / sigma; float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); @@ -1833,16 +1836,15 @@ static sd::Tensor sample_euler_cfg_pp(denoise_cb_t model, const std::vector& sigmas) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - sd::Tensor uncond_denoised; - - auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised); - if (denoised_opt.empty() || uncond_denoised.empty()) { + float sigma = sigmas[i]; + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); - sd::Tensor d = (x - uncond_denoised) / sigma; + sd::Tensor denoised = std::move(denoised_opt.pred); + sd::Tensor uncond_denoised = std::move(denoised_opt.pred_uncond); + sd::Tensor d = (x - uncond_denoised) / sigma; x = denoised + d * sigmas[i + 1]; } @@ -1856,16 +1858,15 @@ static sd::Tensor sample_euler_ancestral_cfg_pp(denoise_cb_t model, float eta) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - sd::Tensor uncond_denoised; - - auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised); - if (denoised_opt.empty() || uncond_denoised.empty()) { + float sigma = sigmas[i]; + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) { return {}; } - sd::Tensor denoised = std::move(denoised_opt); - sd::Tensor d = (x - uncond_denoised) / sigma; + sd::Tensor denoised = std::move(denoised_opt.pred); + sd::Tensor uncond_denoised = std::move(denoised_opt.pred_uncond); + sd::Tensor d = (x - uncond_denoised) / sigma; auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta); diff --git a/src/guidance.cpp b/src/guidance.cpp new file mode 100644 index 000000000..f2985ec7a --- /dev/null +++ b/src/guidance.cpp @@ -0,0 +1,89 @@ +#include "guidance.h" + +#include + +namespace sd::guidance { + + static bool has_tensor(const sd::Tensor* tensor) { + return tensor != nullptr && !tensor->empty(); + } + + ClassifierFreeGuidance::ClassifierFreeGuidance(float guidance_scale, + float image_guidance_scale) + : guidance_scale_(guidance_scale), + image_guidance_scale_(image_guidance_scale) { + } + + GuiderOutput ClassifierFreeGuidance::forward(const GuidanceInput& input, + GuiderOutput previous) const { + (void)previous; + + GuiderOutput output; + if (!has_tensor(input.pred_cond)) { + return output; + } + + const sd::Tensor& pred_cond = *input.pred_cond; + output.pred = pred_cond; + if (has_tensor(input.pred_uncond)) { + const sd::Tensor& pred_uncond = *input.pred_uncond; + if (has_tensor(input.pred_img_cond)) { + const sd::Tensor& pred_img_cond = *input.pred_img_cond; + output.pred = pred_uncond + + image_guidance_scale_ * (pred_img_cond - pred_uncond) + + guidance_scale_ * (pred_cond - pred_img_cond); + } else { + output.pred = pred_uncond + guidance_scale_ * (pred_cond - pred_uncond); + } + } else if (has_tensor(input.pred_img_cond)) { + const sd::Tensor& pred_img_cond = *input.pred_img_cond; + output.pred = pred_img_cond + guidance_scale_ * (pred_cond - pred_img_cond); + } + + return output; + } + + SkipLayerGuidance::SkipLayerGuidance(std::vector layers, + float scale, + float start, + float stop) + : layers_(std::move(layers)), + scale_(scale), + start_(start), + stop_(stop) { + } + + bool SkipLayerGuidance::is_enabled_for_step(const GuidanceInput& input) const { + if (scale_ == 0.0f || layers_.empty() || input.schedule_size == 0) { + return false; + } + + int start_step = static_cast(start_ * static_cast(input.schedule_size)); + int stop_step = static_cast(stop_ * static_cast(input.schedule_size)); + return input.step > start_step && input.step < stop_step; + } + + const std::vector& SkipLayerGuidance::layers() const { + return layers_; + } + + GuiderOutput SkipLayerGuidance::forward(const GuidanceInput& input, + GuiderOutput output) const { + if (!is_enabled_for_step(input) || !input.predict_skip_layer) { + return output; + } + + if (output.pred.empty() || !has_tensor(input.pred_cond)) { + return GuiderOutput(); + } + + output.pred_skip_layer = input.predict_skip_layer(); + if (output.pred_skip_layer.empty()) { + return GuiderOutput(); + } + + output.pred += (*input.pred_cond - output.pred_skip_layer) * scale_; + return output; + } + +} // namespace sd::guidance diff --git a/src/guidance.h b/src/guidance.h new file mode 100644 index 000000000..83d18b2d9 --- /dev/null +++ b/src/guidance.h @@ -0,0 +1,70 @@ +#ifndef __SD_GUIDANCE_H__ +#define __SD_GUIDANCE_H__ + +#include +#include +#include + +#include "tensor.hpp" + +namespace sd::guidance { + + struct GuiderOutput { + sd::Tensor pred; + sd::Tensor pred_cond; + sd::Tensor pred_uncond; + sd::Tensor pred_img_cond; + sd::Tensor pred_skip_layer; + }; + + struct GuidanceInput { + int step = 0; + size_t schedule_size = 0; + const sd::Tensor* pred_cond = nullptr; + const sd::Tensor* pred_uncond = nullptr; + const sd::Tensor* pred_img_cond = nullptr; + + std::function()> predict_skip_layer; + }; + + class BaseGuidance { + public: + virtual ~BaseGuidance() = default; + virtual GuiderOutput forward(const GuidanceInput& input, + GuiderOutput previous) const = 0; + }; + + class ClassifierFreeGuidance : public BaseGuidance { + float guidance_scale_ = 1.0f; + float image_guidance_scale_ = 1.0f; + + public: + ClassifierFreeGuidance(float guidance_scale, + float image_guidance_scale); + + GuiderOutput forward(const GuidanceInput& input, + GuiderOutput previous) const override; + }; + + class SkipLayerGuidance : public BaseGuidance { + std::vector layers_; + float scale_ = 0.0f; + float start_ = 0.0f; + float stop_ = 1.0f; + + public: + SkipLayerGuidance(std::vector layers, + float scale, + float start, + float stop); + + bool is_enabled_for_step(const GuidanceInput& input) const; + const std::vector& layers() const; + + GuiderOutput forward(const GuidanceInput& input, + GuiderOutput previous) const override; + }; + +} // namespace sd::guidance + +#endif // __SD_GUIDANCE_H__ diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index 5b92cefa4..71b7d39b9 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -14,6 +14,7 @@ #include "denoiser.hpp" #include "diffusion_model.hpp" #include "esrgan.hpp" +#include "guidance.h" #include "lora.hpp" #include "ltx_audio_vae.h" #include "ltx_vae.hpp" @@ -1854,8 +1855,9 @@ class StableDiffusionGGML { denoiser.get(), sigmas); + bool needs_uncond_denoised = method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD; // Spectrum cache is not supported for CFG++ samplers - if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) { + if (needs_uncond_denoised) { if (cache_runtime.spectrum_enabled) { LOG_WARN("Spectrum cache requested but not supported for CFG++ samplers"); cache_runtime.spectrum_enabled = false; @@ -1868,6 +1870,11 @@ class StableDiffusionGGML { has_skiplayer = false; LOG_WARN("SLG is incompatible with this model type"); } + sd::guidance::ClassifierFreeGuidance classifier_free_guidance(cfg_scale, img_cfg_scale); + sd::guidance::SkipLayerGuidance skip_layer_guidance(has_skiplayer ? skip_layers : std::vector(), + has_skiplayer ? slg_scale : 0.0f, + guidance.slg.layer_start, + guidance.slg.layer_end); if (version == VERSION_HIDREAM_O1 && !noise.empty()) { noise *= eta; @@ -1880,7 +1887,7 @@ class StableDiffusionGGML { sd::Tensor denoised = x_t; SamplePreviewContext preview = prepare_sample_preview_context(); - auto denoise = [&](const sd::Tensor& x, float sigma, int step, sd::Tensor* out_uncond_denoised = nullptr) -> sd::Tensor { + auto denoise = [&](const sd::Tensor& x, float sigma, int step) -> sd::guidance::GuiderOutput { if (step == 1 || step == -1) { pretty_progress(0, (int)steps, 0); } @@ -1913,17 +1920,17 @@ class StableDiffusionGGML { } if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) { - if (out_uncond_denoised == nullptr) { - cache_runtime.spectrum.predict(&denoised); - if (!denoise_mask.empty()) { - denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask); - } - if (sd_should_preview_denoised() && preview.callback != nullptr) { - preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false); - } - report_sample_progress(step, steps, t0); - return denoised; + cache_runtime.spectrum.predict(&denoised); + if (!denoise_mask.empty()) { + denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask); + } + if (sd_should_preview_denoised() && preview.callback != nullptr) { + preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false); } + report_sample_progress(step, steps, t0); + sd::guidance::GuiderOutput output; + output.pred = denoised; + return output; } if (sd_should_preview_noisy() && preview.callback != nullptr) { @@ -1933,7 +1940,6 @@ class StableDiffusionGGML { sd::Tensor cond_out; sd::Tensor uncond_out; sd::Tensor img_cond_out; - sd::Tensor skip_cond_out; sd_sample::SampleStepCacheDispatcher step_cache(cache_runtime, step, sigma); std::vector> controls; DiffusionParams diffusion_params; @@ -2023,42 +2029,40 @@ class StableDiffusionGGML { return {}; } } - bool is_skiplayer_step = has_skiplayer && - step > (int)(guidance.slg.layer_start * static_cast(sigmas.size())) && - step < (int)(guidance.slg.layer_end * static_cast(sigmas.size())); - if (is_skiplayer_step) { + sd::guidance::GuidanceInput guidance_input; + guidance_input.step = step; + guidance_input.schedule_size = sigmas.size(); + guidance_input.pred_cond = &cond_out; + guidance_input.pred_uncond = uncond_out.empty() ? nullptr : &uncond_out; + guidance_input.pred_img_cond = img_cond_out.empty() ? nullptr : &img_cond_out; + + sd::guidance::GuiderOutput guided = classifier_free_guidance.forward(guidance_input, {}); + if (guided.pred.empty()) { + return {}; + } + + if (skip_layer_guidance.is_enabled_for_step(guidance_input)) { LOG_DEBUG("Skipping layers at step %d\n", step); if (!step_cache.is_step_skipped()) { - skip_cond_out = run_condition(cond, - cond.c_concat.empty() ? nullptr : &cond.c_concat, - &skip_layers); - if (skip_cond_out.empty()) { - return {}; - } + guidance_input.predict_skip_layer = [&]() -> sd::Tensor { + return run_condition(cond, + cond.c_concat.empty() ? nullptr : &cond.c_concat, + &skip_layer_guidance.layers()); + }; } } - GGML_ASSERT(!cond_out.empty()); - sd::Tensor latent_result = cond_out; - if (!uncond_out.empty()) { - if (!img_cond_out.empty()) { - latent_result = uncond_out + - img_cfg_scale * (img_cond_out - uncond_out) + - cfg_scale * (cond_out - img_cond_out); - } else { - latent_result = uncond_out + cfg_scale * (cond_out - uncond_out); - } - } else if (!img_cond_out.empty()) { - latent_result = img_cond_out + cfg_scale * (cond_out - img_cond_out); + guided = skip_layer_guidance.forward(guidance_input, std::move(guided)); + if (guided.pred.empty()) { + return {}; } - if (is_skiplayer_step && !skip_cond_out.empty()) { - latent_result += (cond_out - skip_cond_out) * slg_scale; - } - denoised = latent_result * c_out + x * c_skip; - if (out_uncond_denoised != nullptr) { - sd::Tensor base_uncond = !uncond_out.empty() ? uncond_out : cond_out; - *out_uncond_denoised = base_uncond * c_out + x * c_skip; + denoised = guided.pred * c_out + x * c_skip; + sd::guidance::GuiderOutput output; + output.pred = denoised; + if (needs_uncond_denoised) { + const sd::Tensor& base_uncond = !uncond_out.empty() ? uncond_out : cond_out; + output.pred_uncond = base_uncond * c_out + x * c_skip; } if (cache_runtime.spectrum_enabled) { cache_runtime.spectrum.update(denoised); @@ -2070,7 +2074,8 @@ class StableDiffusionGGML { preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false); } report_sample_progress(step, steps, t0); - return denoised; + output.pred = denoised; + return output; }; auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta, is_flow_denoiser, extra_sample_args);