diff --git a/src/denoiser.hpp b/src/denoiser.hpp
index 572481889..c59a9733f 100644
--- a/src/denoiser.hpp
+++ b/src/denoiser.hpp
@@ -4,11 +4,13 @@
 #include <algorithm>
 #include <cctype>
 #include <cmath>
+#include <functional>
 #include <string>
 #include <utility>
 
 #include "ggml_extend.hpp"
 #include "gits_noise.inl"
+#include "guidance.h"
 #include "tensor.hpp"
 
 /*================================================= CompVisDenoiser ==================================================*/
@@ -894,7 +896,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser {
     }
 };
 
-typedef std::function<sd::Tensor<float>(const sd::Tensor<float>&, float, int, sd::Tensor<float>*)> denoise_cb_t;
+typedef std::function<sd::guidance::GuiderOutput(const sd::Tensor<float>&, float, int)> denoise_cb_t;
 
 static std::pair<float, float> get_ancestral_step(float sigma_from,
                                                   float sigma_to,
@@ -972,11 +974,11 @@ static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
     for (int i = 0; i < steps; i++) {
         float sigma       = sigmas[i];
         float sigma_to    = sigmas[i + 1];
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigma, i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
         if (sigma_to == 0.f) {
             x = denoised;
         } else if (eta == 0.f) {
@@ -1003,11 +1005,11 @@ static sd::Tensor<float> sample_euler(denoise_cb_t model,
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
         float sigma       = sigmas[i];
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigma, i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
         sd::Tensor<float> d        = (x - denoised) / sigma;
         x += d * (sigmas[i + 1] - sigma);
     }
@@ -1019,22 +1021,22 @@ static sd::Tensor<float> sample_heun(denoise_cb_t model,
                                      const std::vector<float>& sigmas) {
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigmas[i], -(i + 1));
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
         sd::Tensor<float> d        = (x - denoised) / sigmas[i];
         float dt                   = sigmas[i + 1] - sigmas[i];
         if (sigmas[i + 1] == 0) {
             x += d * dt;
         } else {
             sd::Tensor<float> x2 = x + d * dt;
-            auto denoised2_opt   = model(x2, sigmas[i + 1], i + 1, nullptr);
-            if (denoised2_opt.empty()) {
+            auto denoised2_opt   = model(x2, sigmas[i + 1], i + 1);
+            if (denoised2_opt.pred.empty()) {
                 return {};
             }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
             d                           = (d + (x2 - denoised2) / sigmas[i + 1]) / 2.0f;
             x += d * dt;
         }
@@ -1047,11 +1049,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
                                      const std::vector<float>& sigmas) {
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigmas[i], -(i + 1));
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
         sd::Tensor<float> d        = (x - denoised) / sigmas[i];
         if (sigmas[i + 1] == 0) {
             x += d * (sigmas[i + 1] - sigmas[i]);
@@ -1060,11 +1062,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
             float dt_1           = sigma_mid - sigmas[i];
             float dt_2           = sigmas[i + 1] - sigmas[i];
             sd::Tensor<float> x2 = x + d * dt_1;
-            auto denoised2_opt   = model(x2, sigma_mid, i + 1, nullptr);
-            if (denoised2_opt.empty()) {
+            auto denoised2_opt   = model(x2, sigma_mid, i + 1);
+            if (denoised2_opt.pred.empty()) {
                 return {};
             }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
             x += ((x2 - denoised2) / sigma_mid) * dt_2;
         }
     }
@@ -1081,11 +1083,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
 
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigmas[i], -(i + 1));
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised  = std::move(denoised_opt);
+        sd::Tensor<float> denoised  = std::move(denoised_opt.pred);
         auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
 
         if (sigma_down == 0) {
@@ -1097,11 +1099,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
             float s              = t + 0.5f * h;
             float sigma_s        = sigma_fn(s);
             sd::Tensor<float> x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised;
-            auto denoised2_opt   = model(x2, sigma_s, i + 1, nullptr);
-            if (denoised2_opt.empty()) {
+            auto denoised2_opt   = model(x2, sigma_s, i + 1);
+            if (denoised2_opt.pred.empty()) {
                 return {};
             }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
             x                           = (sigma_fn(t_next) / sigma_fn(t)) * x - (exp(-h) - 1) * denoised2;
         }
 
@@ -1124,11 +1126,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
 
         bool opt_first_step = (1.0 - sigma < 1e-6);
 
-        auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1), nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1));
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
 
         if (sigma_to == 0.0f) {
             // Euler method (final step, no noise)
@@ -1153,8 +1155,8 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
                 // so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
                 // u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
                 //   = (x*1)+(denoised*0) = x
-                // so D_i = model(u, sigma_s, i + 1, nullptr)
-                //        = model(x, sigma,   i + 1, nullptr)
+                // so D_i = model(u, sigma_s, i + 1)
+                //        = model(x, sigma,   i + 1)
                 //        = denoised
                 D_i = denoised;
 
@@ -1187,11 +1189,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
                 float sigma_s_i_ratio = sigma_s / sigma;
                 sd::Tensor<float> u   = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
 
-                auto denoised2_opt = model(u, sigma_s, i + 1, nullptr);
-                if (denoised2_opt.empty()) {
+                auto denoised2_opt = model(u, sigma_s, i + 1);
+                if (denoised2_opt.pred.empty()) {
                     return {};
                 }
-                D_i = std::move(denoised2_opt);
+                D_i = std::move(denoised2_opt.pred);
             }
 
             float sigma_down_i_ratio = sigma_down / sigma;
@@ -1214,11 +1216,11 @@ static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
 
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigmas[i], i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
         float t                    = t_fn(sigmas[i]);
         float t_next               = t_fn(sigmas[i + 1]);
         float h                    = t_next - t;
@@ -1246,11 +1248,11 @@ static sd::Tensor<float> sample_dpmpp_2m_v2(denoise_cb_t model,
 
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigmas[i], i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
         float t                    = t_fn(sigmas[i]);
         float t_next               = t_fn(sigmas[i + 1]);
         float h                    = t_next - t;
@@ -1354,11 +1356,11 @@ static sd::Tensor<float> sample_lcm(denoise_cb_t model,
 
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigmas[i], i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        x = std::move(denoised_opt);
+        x = std::move(denoised_opt.pred);
         if (sigmas[i + 1] > 0) {
             if (is_flow_denoiser) {
                 x *= (1 - sigmas[i + 1]);
@@ -1400,11 +1402,11 @@ static sd::Tensor<float> sample_ipndm(denoise_cb_t model,
         float sigma      = sigmas[i];
         float sigma_next = sigmas[i + 1];
 
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigma, i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
 
         sd::Tensor<float> d_cur = (x - denoised) / sigma;
         int order               = std::min(max_order, i + 1);
@@ -1444,11 +1446,11 @@ static sd::Tensor<float> sample_ipndm_v(denoise_cb_t model,
         float sigma  = sigmas[i];
         float t_next = sigmas[i + 1];
 
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigma, i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
 
         sd::Tensor<float> d_cur = (x - denoised) / sigma;
         int order               = std::min(max_order, i + 1);
@@ -1506,11 +1508,11 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
 
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigmas[i], i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
 
         float sigma_from = sigmas[i];
         float sigma_to   = sigmas[i + 1];
@@ -1583,11 +1585,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
         float sigma_from = sigmas[i];
         float sigma_to   = sigmas[i + 1];
 
-        auto denoised_opt = model(x, sigma_from, -(i + 1), nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigma_from, -(i + 1));
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
 
         auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
 
@@ -1609,11 +1611,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
             sd::Tensor<float> eps1 = denoised - x0;
             sd::Tensor<float> x2   = x0 + eps1 * (h * a21);
 
-            auto denoised2_opt = model(x2, sigma_c2, i + 1, nullptr);
-            if (denoised2_opt.empty()) {
+            auto denoised2_opt = model(x2, sigma_c2, i + 1);
+            if (denoised2_opt.pred.empty()) {
                 return {};
             }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
             sd::Tensor<float> eps2      = denoised2 - x0;
             x                           = x0 + h * (b1 * eps1 + b2 * eps2);
         }
@@ -1686,10 +1688,11 @@ static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
 
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        sd::Tensor<float> denoised = model(x, sigmas[i], i + 1, nullptr);
-        if (denoised.empty()) {
+        auto denoised_opt = model(x, sigmas[i], i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
 
         int stage_used = std::min(max_stage, i + 1);
 
@@ -1804,11 +1807,11 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
         int timestep_s    = (int)floor((1 - eta) * prev_timestep);
         float sigma       = sigmas[i];
 
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
-        if (denoised_opt.empty()) {
+        auto denoised_opt = model(x, sigma, i + 1);
+        if (denoised_opt.pred.empty()) {
             return {};
         }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
         sd::Tensor<float> d        = (x - denoised) / sigma;
 
         float alpha_prod_t      = 1.0f / (sigma * sigma + 1.0f);
@@ -1833,16 +1836,15 @@ static sd::Tensor<float> sample_euler_cfg_pp(denoise_cb_t model,
                                              const std::vector<float>& sigmas) {
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        float sigma = sigmas[i];
-        sd::Tensor<float> uncond_denoised;
-
-        auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
-        if (denoised_opt.empty() || uncond_denoised.empty()) {
+        float sigma       = sigmas[i];
+        auto denoised_opt = model(x, sigma, i + 1);
+        if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
             return {};
         }
 
-        sd::Tensor<float> denoised = std::move(denoised_opt);
-        sd::Tensor<float> d        = (x - uncond_denoised) / sigma;
+        sd::Tensor<float> denoised        = std::move(denoised_opt.pred);
+        sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
+        sd::Tensor<float> d               = (x - uncond_denoised) / sigma;
 
         x = denoised + d * sigmas[i + 1];
     }
@@ -1856,16 +1858,15 @@ static sd::Tensor<float> sample_euler_ancestral_cfg_pp(denoise_cb_t model,
                                                        float eta) {
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        float sigma = sigmas[i];
-        sd::Tensor<float> uncond_denoised;
-
-        auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
-        if (denoised_opt.empty() || uncond_denoised.empty()) {
+        float sigma       = sigmas[i];
+        auto denoised_opt = model(x, sigma, i + 1);
+        if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
             return {};
         }
 
-        sd::Tensor<float> denoised = std::move(denoised_opt);
-        sd::Tensor<float> d        = (x - uncond_denoised) / sigma;
+        sd::Tensor<float> denoised        = std::move(denoised_opt.pred);
+        sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
+        sd::Tensor<float> d               = (x - uncond_denoised) / sigma;
 
         auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
 
diff --git a/src/guidance.cpp b/src/guidance.cpp
new file mode 100644
index 000000000..f2985ec7a
--- /dev/null
+++ b/src/guidance.cpp
@@ -0,0 +1,89 @@
+#include "guidance.h"
+
+#include <utility>
+
+namespace sd::guidance {
+
+    static bool has_tensor(const sd::Tensor<float>* tensor) {
+        return tensor != nullptr && !tensor->empty();
+    }
+
+    ClassifierFreeGuidance::ClassifierFreeGuidance(float guidance_scale,
+                                                   float image_guidance_scale)
+        : guidance_scale_(guidance_scale),
+          image_guidance_scale_(image_guidance_scale) {
+    }
+
+    GuiderOutput ClassifierFreeGuidance::forward(const GuidanceInput& input,
+                                                 GuiderOutput previous) const {
+        (void)previous;
+
+        GuiderOutput output;
+        if (!has_tensor(input.pred_cond)) {
+            return output;
+        }
+
+        const sd::Tensor<float>& pred_cond = *input.pred_cond;
+        output.pred                        = pred_cond;
+        if (has_tensor(input.pred_uncond)) {
+            const sd::Tensor<float>& pred_uncond = *input.pred_uncond;
+            if (has_tensor(input.pred_img_cond)) {
+                const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
+                output.pred                            = pred_uncond +
+                              image_guidance_scale_ * (pred_img_cond - pred_uncond) +
+                              guidance_scale_ * (pred_cond - pred_img_cond);
+            } else {
+                output.pred = pred_uncond + guidance_scale_ * (pred_cond - pred_uncond);
+            }
+        } else if (has_tensor(input.pred_img_cond)) {
+            const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
+            output.pred                            = pred_img_cond + guidance_scale_ * (pred_cond - pred_img_cond);
+        }
+
+        return output;
+    }
+
+    SkipLayerGuidance::SkipLayerGuidance(std::vector<int> layers,
+                                         float scale,
+                                         float start,
+                                         float stop)
+        : layers_(std::move(layers)),
+          scale_(scale),
+          start_(start),
+          stop_(stop) {
+    }
+
+    bool SkipLayerGuidance::is_enabled_for_step(const GuidanceInput& input) const {
+        if (scale_ == 0.0f || layers_.empty() || input.schedule_size == 0) {
+            return false;
+        }
+
+        int start_step = static_cast<int>(start_ * static_cast<float>(input.schedule_size));
+        int stop_step  = static_cast<int>(stop_ * static_cast<float>(input.schedule_size));
+        return input.step > start_step && input.step < stop_step;
+    }
+
+    const std::vector<int>& SkipLayerGuidance::layers() const {
+        return layers_;
+    }
+
+    GuiderOutput SkipLayerGuidance::forward(const GuidanceInput& input,
+                                            GuiderOutput output) const {
+        if (!is_enabled_for_step(input) || !input.predict_skip_layer) {
+            return output;
+        }
+
+        if (output.pred.empty() || !has_tensor(input.pred_cond)) {
+            return GuiderOutput();
+        }
+
+        output.pred_skip_layer = input.predict_skip_layer();
+        if (output.pred_skip_layer.empty()) {
+            return GuiderOutput();
+        }
+
+        output.pred += (*input.pred_cond - output.pred_skip_layer) * scale_;
+        return output;
+    }
+
+}  // namespace sd::guidance
diff --git a/src/guidance.h b/src/guidance.h
new file mode 100644
index 000000000..83d18b2d9
--- /dev/null
+++ b/src/guidance.h
@@ -0,0 +1,70 @@
+#ifndef __SD_GUIDANCE_H__
+#define __SD_GUIDANCE_H__
+
+#include <cstddef>
+#include <functional>
+#include <vector>
+
+#include "tensor.hpp"
+
+namespace sd::guidance {
+
+    struct GuiderOutput {
+        sd::Tensor<float> pred;
+        sd::Tensor<float> pred_cond;
+        sd::Tensor<float> pred_uncond;
+        sd::Tensor<float> pred_img_cond;
+        sd::Tensor<float> pred_skip_layer;
+    };
+
+    struct GuidanceInput {
+        int step                               = 0;
+        size_t schedule_size                   = 0;
+        const sd::Tensor<float>* pred_cond     = nullptr;
+        const sd::Tensor<float>* pred_uncond   = nullptr;
+        const sd::Tensor<float>* pred_img_cond = nullptr;
+
+        std::function<sd::Tensor<float>()> predict_skip_layer;
+    };
+
+    class BaseGuidance {
+    public:
+        virtual ~BaseGuidance()                                   = default;
+        virtual GuiderOutput forward(const GuidanceInput& input,
+                                     GuiderOutput previous) const = 0;
+    };
+
+    class ClassifierFreeGuidance : public BaseGuidance {
+        float guidance_scale_       = 1.0f;
+        float image_guidance_scale_ = 1.0f;
+
+    public:
+        ClassifierFreeGuidance(float guidance_scale,
+                               float image_guidance_scale);
+
+        GuiderOutput forward(const GuidanceInput& input,
+                             GuiderOutput previous) const override;
+    };
+
+    class SkipLayerGuidance : public BaseGuidance {
+        std::vector<int> layers_;
+        float scale_ = 0.0f;
+        float start_ = 0.0f;
+        float stop_  = 1.0f;
+
+    public:
+        SkipLayerGuidance(std::vector<int> layers,
+                          float scale,
+                          float start,
+                          float stop);
+
+        bool is_enabled_for_step(const GuidanceInput& input) const;
+        const std::vector<int>& layers() const;
+
+        GuiderOutput forward(const GuidanceInput& input,
+                             GuiderOutput previous) const override;
+    };
+
+}  // namespace sd::guidance
+
+#endif  // __SD_GUIDANCE_H__
diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
index 5b92cefa4..71b7d39b9 100644
--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@@ -14,6 +14,7 @@
 #include "denoiser.hpp"
 #include "diffusion_model.hpp"
 #include "esrgan.hpp"
+#include "guidance.h"
 #include "lora.hpp"
 #include "ltx_audio_vae.h"
 #include "ltx_vae.hpp"
@@ -1854,8 +1855,9 @@ class StableDiffusionGGML {
                                                                                            denoiser.get(),
                                                                                            sigmas);
 
+        bool needs_uncond_denoised = method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD;
         // Spectrum cache is not supported for CFG++ samplers
-        if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) {
+        if (needs_uncond_denoised) {
             if (cache_runtime.spectrum_enabled) {
                 LOG_WARN("Spectrum cache requested but not supported for CFG++ samplers");
                 cache_runtime.spectrum_enabled = false;
@@ -1868,6 +1870,11 @@ class StableDiffusionGGML {
             has_skiplayer = false;
             LOG_WARN("SLG is incompatible with this model type");
         }
+        sd::guidance::ClassifierFreeGuidance classifier_free_guidance(cfg_scale, img_cfg_scale);
+        sd::guidance::SkipLayerGuidance skip_layer_guidance(has_skiplayer ? skip_layers : std::vector<int>(),
+                                                            has_skiplayer ? slg_scale : 0.0f,
+                                                            guidance.slg.layer_start,
+                                                            guidance.slg.layer_end);
 
         if (version == VERSION_HIDREAM_O1 && !noise.empty()) {
             noise *= eta;
@@ -1880,7 +1887,7 @@ class StableDiffusionGGML {
         sd::Tensor<float> denoised   = x_t;
         SamplePreviewContext preview = prepare_sample_preview_context();
 
-        auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step, sd::Tensor<float>* out_uncond_denoised = nullptr) -> sd::Tensor<float> {
+        auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step) -> sd::guidance::GuiderOutput {
             if (step == 1 || step == -1) {
                 pretty_progress(0, (int)steps, 0);
             }
@@ -1913,17 +1920,17 @@ class StableDiffusionGGML {
             }
 
             if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) {
-                if (out_uncond_denoised == nullptr) {
-                    cache_runtime.spectrum.predict(&denoised);
-                    if (!denoise_mask.empty()) {
-                        denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask);
-                    }
-                    if (sd_should_preview_denoised() && preview.callback != nullptr) {
-                        preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
-                    }
-                    report_sample_progress(step, steps, t0);
-                    return denoised;
+                cache_runtime.spectrum.predict(&denoised);
+                if (!denoise_mask.empty()) {
+                    denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask);
+                }
+                if (sd_should_preview_denoised() && preview.callback != nullptr) {
+                    preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
                 }
+                report_sample_progress(step, steps, t0);
+                sd::guidance::GuiderOutput output;
+                output.pred = denoised;
+                return output;
             }
 
             if (sd_should_preview_noisy() && preview.callback != nullptr) {
@@ -1933,7 +1940,6 @@ class StableDiffusionGGML {
             sd::Tensor<float> cond_out;
             sd::Tensor<float> uncond_out;
             sd::Tensor<float> img_cond_out;
-            sd::Tensor<float> skip_cond_out;
             sd_sample::SampleStepCacheDispatcher step_cache(cache_runtime, step, sigma);
             std::vector<sd::Tensor<float>> controls;
             DiffusionParams diffusion_params;
@@ -2023,42 +2029,40 @@ class StableDiffusionGGML {
                     return {};
                 }
             }
-            bool is_skiplayer_step = has_skiplayer &&
-                                     step > (int)(guidance.slg.layer_start * static_cast<int>(sigmas.size())) &&
-                                     step < (int)(guidance.slg.layer_end * static_cast<int>(sigmas.size()));
-            if (is_skiplayer_step) {
+            sd::guidance::GuidanceInput guidance_input;
+            guidance_input.step          = step;
+            guidance_input.schedule_size = sigmas.size();
+            guidance_input.pred_cond     = &cond_out;
+            guidance_input.pred_uncond   = uncond_out.empty() ? nullptr : &uncond_out;
+            guidance_input.pred_img_cond = img_cond_out.empty() ? nullptr : &img_cond_out;
+
+            sd::guidance::GuiderOutput guided = classifier_free_guidance.forward(guidance_input, {});
+            if (guided.pred.empty()) {
+                return {};
+            }
+
+            if (skip_layer_guidance.is_enabled_for_step(guidance_input)) {
                 LOG_DEBUG("Skipping layers at step %d\n", step);
                 if (!step_cache.is_step_skipped()) {
-                    skip_cond_out = run_condition(cond,
-                                                  cond.c_concat.empty() ? nullptr : &cond.c_concat,
-                                                  &skip_layers);
-                    if (skip_cond_out.empty()) {
-                        return {};
-                    }
+                    guidance_input.predict_skip_layer = [&]() -> sd::Tensor<float> {
+                        return run_condition(cond,
+                                             cond.c_concat.empty() ? nullptr : &cond.c_concat,
+                                             &skip_layer_guidance.layers());
+                    };
                 }
             }
 
-            GGML_ASSERT(!cond_out.empty());
-            sd::Tensor<float> latent_result = cond_out;
-            if (!uncond_out.empty()) {
-                if (!img_cond_out.empty()) {
-                    latent_result = uncond_out +
-                                    img_cfg_scale * (img_cond_out - uncond_out) +
-                                    cfg_scale * (cond_out - img_cond_out);
-                } else {
-                    latent_result = uncond_out + cfg_scale * (cond_out - uncond_out);
-                }
-            } else if (!img_cond_out.empty()) {
-                latent_result = img_cond_out + cfg_scale * (cond_out - img_cond_out);
+            guided = skip_layer_guidance.forward(guidance_input, std::move(guided));
+            if (guided.pred.empty()) {
+                return {};
             }
 
-            if (is_skiplayer_step && !skip_cond_out.empty()) {
-                latent_result += (cond_out - skip_cond_out) * slg_scale;
-            }
-            denoised = latent_result * c_out + x * c_skip;
-            if (out_uncond_denoised != nullptr) {
-                sd::Tensor<float> base_uncond = !uncond_out.empty() ? uncond_out : cond_out;
-                *out_uncond_denoised          = base_uncond * c_out + x * c_skip;
+            denoised = guided.pred * c_out + x * c_skip;
+            sd::guidance::GuiderOutput output;
+            output.pred = denoised;
+            if (needs_uncond_denoised) {
+                const sd::Tensor<float>& base_uncond = !uncond_out.empty() ? uncond_out : cond_out;
+                output.pred_uncond                   = base_uncond * c_out + x * c_skip;
             }
             if (cache_runtime.spectrum_enabled) {
                 cache_runtime.spectrum.update(denoised);
@@ -2070,7 +2074,8 @@ class StableDiffusionGGML {
                 preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
             }
             report_sample_progress(step, steps, t0);
-            return denoised;
+            output.pred = denoised;
+            return output;
         };
 
         auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta, is_flow_denoiser, extra_sample_args);