From 516441b4127bdf06a8cab0b47a26f5466165143e Mon Sep 17 00:00:00 2001
From: grauho <grauho@proton.me>
Date: Fri, 26 Apr 2024 17:32:09 -0400
Subject: [PATCH 1/4] Added NVIDEA's new "Align Your Steps" style scheduler in
 accordance with their quick start guide. Currently has handling for SD1.5,
 SDXL, and SVD, using the noise levels from their paper to generate the sigma
 values. Can be selected using the --schedule ays command line switch. Updates
 the main.cpp help message and README to reflect this option, also they now
 inform the user of the --color switch as well.

---
 README.md             |   3 +-
 denoiser.hpp          | 146 +++++++++++++++++++++++++++++++++++++++++-
 examples/cli/main.cpp |   4 +-
 stable-diffusion.cpp  |   5 ++
 stable-diffusion.h    |   1 +
 5 files changed, 155 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index b5920e63..8f4a5f3b 100644
--- a/README.md
+++ b/README.md
@@ -190,12 +190,13 @@ arguments:
   --rng {std_default, cuda}          RNG (default: cuda)
   -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)
   -b, --batch-count COUNT            number of images to generate.
-  --schedule {discrete, karras}      Denoiser sigma schedule (default: discrete)
+  --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete)
   --clip-skip N                      ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
                                      <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
   --vae-tiling                       process vae in tiles to reduce memory usage
   --control-net-cpu                  keep controlnet in cpu (for low vram)
   --canny                            apply canny preprocessor (edge detection)
+  --color                            colors the logging tags according to level
   -v, --verbose                      print extra info
 ```
 
diff --git a/denoiser.hpp b/denoiser.hpp
index fd934540..5e06a4dc 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -13,6 +13,7 @@ struct SigmaSchedule {
     float alphas_cumprod[TIMESTEPS];
     float sigmas[TIMESTEPS];
     float log_sigmas[TIMESTEPS];
+    int version = 0;
 
     virtual std::vector<float> get_sigmas(uint32_t n) = 0;
 
@@ -75,6 +76,147 @@ struct DiscreteSchedule : SigmaSchedule {
     }
 };
 
+/*
+https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
+*/
+struct AYSSchedule : SigmaSchedule {
+    /* interp and linearInterp adapted from dpilger26's NumCpp library:
+     * https://github.com/dpilger26/NumCpp/tree/5e40aab74d14e257d65d3dc385c9ff9e2120c60e */
+    constexpr double interp(double left, double right, double perc) noexcept {
+        return (left * (1. - perc)) + (right * perc);
+    }
+
+    /* This will make the assumption that the reference x and y values are 
+     * already sorted in ascending order because they are being generated as 
+     * such in the calling function */
+    std::vector<double> linearInterp(std::vector<float> new_x, 
+        const std::vector<float> ref_x, const std::vector<float> ref_y)
+    {
+        const size_t len_x = new_x.size();
+        size_t i = 0;
+        size_t j = 0;
+        std::vector<double> new_y(len_x);
+
+        if (ref_x.size() != ref_y.size()) { 
+            LOG_ERROR("Linear Interoplation Failed: length mismatch");
+            return new_y;    
+        }
+
+        /* serves as the bounds checking for the below while loop */
+        if ((new_x[0] < ref_x[0]) 
+        || (new_x[new_x.size() - 1] > ref_x[ref_x.size() - 1])) {
+            LOG_ERROR("Linear Interpolation Failed: bad bounds");
+            return new_y;
+        }
+
+        while (i < len_x) {
+            if ((ref_x[j] > new_x[i]) || (new_x[i] > ref_x[j + 1])) {
+                j++;
+                continue;
+            }
+
+            const double perc = static_cast<double>(new_x[i] - ref_x[j])
+                / static_cast<double>(ref_x[j + 1] - ref_x[j]);
+
+            new_y[i] = interp(ref_y[j], ref_y[j + 1], perc);
+            i++;
+        }
+
+        return new_y;
+    }
+
+    std::vector<float> linearSpace(const float start, const float end, 
+        const size_t num_points) {
+        std::vector<float> result(num_points);
+        const float inc = (end - start) / (static_cast<float>(num_points - 1));
+
+        if (num_points > 0) {
+            result[0] = start;
+
+            for (size_t i = 1; i < num_points; i++) {
+                result[i] = result[i - 1] + inc;
+            }
+        }
+
+        return result;
+    }
+
+    std::vector<float> logLinearInterpolation(std::vector<float> sigma_in, 
+        const size_t new_len) {
+        const size_t s_len        = sigma_in.size();
+        std::vector<float> x_vals = linearSpace(0.f, 1.f, s_len);
+        std::vector<float> y_vals(s_len);
+
+        /* Reverses the input array to be ascending instead of descending,
+         * also hits it with a log, it is log-linear interpolation after all */
+        for (size_t i = 0; i < s_len; i++) {
+            y_vals[i] = std::log(sigma_in[s_len - i - 1]);
+        }
+
+        std::vector<float>  new_x_vals = linearSpace(0.f, 1.f, new_len);
+        std::vector<double> new_y_vals = linearInterp(new_x_vals, x_vals, y_vals);
+        std::vector<float> results(new_len);
+
+        for (size_t i = 0; i < new_len; i++) {
+            results[i] = static_cast<float>(std::exp(new_y_vals[new_len - i - 1]));
+        }
+
+        return results;
+    }
+
+
+    std::vector<float> get_sigmas(uint32_t len) {
+        const std::vector<float> noise_levels[] = {
+            /* SD1.5 */
+            {14.6146412293, 6.4745760956,  3.8636745985,  2.6946151520, 
+            1.8841921177,   1.3943805092,  0.9642583904,  0.6523686016, 
+            0.3977456272,   0.1515232662,  0.0291671582},
+            /* SDXL */
+            {14.6146412293, 6.3184485287,  3.7681790315,  2.1811480769, 
+            1.3405244945,   0.8620721141,  0.5550693289,  0.3798540708, 
+            0.2332364134,   0.1114188177,  0.0291671582},
+            /* SVD */
+            {700.00, 54.5, 15.886, 7.977, 4.248, 1.789, 0.981, 0.403, 
+            0.173, 0.034, 0.002},
+        };
+    
+        /* Hard coded to SDXL while testing */
+        std::vector<float> inputs;
+        std::vector<float> results(len + 1);
+    
+        switch (version) {
+            case VERSION_2_x: /* fallthrough */
+	        LOG_WARN("AYS not designed for SD2.X models");
+            case VERSION_1_x: 
+	    	LOG_INFO("AYS using SD1.5 noise levels");
+                inputs = noise_levels[0];
+                break;
+            case VERSION_XL:
+	    	LOG_INFO("AYS using SDXL noise levels");
+                inputs = noise_levels[1];
+                break;
+            case VERSION_SVD:
+	    	LOG_INFO("AYS using SVD noise levels");
+                inputs = noise_levels[2];
+                break;
+            default:
+                LOG_ERROR("Version not compatable with AYS scheduler");
+                return results;
+        }
+            
+        /* Stretches those pre-calculated reference levels out to the desired
+         * size using log-linear interpolation */
+        if ((len + 1) != inputs.size()) {
+            results = logLinearInterpolation(inputs, len + 1);
+        }
+    
+        /* Not sure if this is strictly neccessary */
+        results[len] = 0.0f;
+    
+        return results;
+    }
+};
+
 struct KarrasSchedule : SigmaSchedule {
     std::vector<float> get_sigmas(uint32_t n) {
         // These *COULD* be function arguments here,
@@ -87,7 +229,7 @@ struct KarrasSchedule : SigmaSchedule {
 
         float min_inv_rho = pow(sigma_min, (1.f / rho));
         float max_inv_rho = pow(sigma_max, (1.f / rho));
-        for (uint32_t i = 0; i < n; i++) {
+        for (uint32_t i = 0; i < n; i++) { 
             // Eq. (5) from Karras et al 2022
             result[i] = pow(max_inv_rho + (float)i / ((float)n - 1.f) * (min_inv_rho - max_inv_rho), rho);
         }
@@ -122,4 +264,4 @@ struct CompVisVDenoiser : public Denoiser {
     }
 };
 
-#endif  // __DENOISER_HPP__
\ No newline at end of file
+#endif  // __DENOISER_HPP__
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index 0f26644b..565af74a 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -43,6 +43,7 @@ const char* schedule_str[] = {
     "default",
     "discrete",
     "karras",
+    "ays",
 };
 
 const char* modes_str[] = {
@@ -190,12 +191,13 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --rng {std_default, cuda}          RNG (default: cuda)\n");
     printf("  -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)\n");
     printf("  -b, --batch-count COUNT            number of images to generate.\n");
-    printf("  --schedule {discrete, karras}      Denoiser sigma schedule (default: discrete)\n");
+    printf("  --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete)\n");
     printf("  --clip-skip N                      ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
     printf("                                     <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
     printf("  --vae-tiling                       process vae in tiles to reduce memory usage\n");
     printf("  --control-net-cpu                  keep controlnet in cpu (for low vram)\n");
     printf("  --canny                            apply canny preprocessor (edge detection)\n");
+    printf("  --color                            Colors the logging tags according to level\n");
     printf("  -v, --verbose                      print extra info\n");
 }
 
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index abaae693..09eb8f45 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -450,6 +450,11 @@ class StableDiffusionGGML {
                     LOG_INFO("running with Karras schedule");
                     denoiser->schedule = std::make_shared<KarrasSchedule>();
                     break;
+		case AYS:
+		    LOG_INFO("Running with Align-Your-Steps schedule");
+		    denoiser->schedule = std::make_shared<AYSSchedule>();
+		    denoiser->schedule->version = version;
+		    break;
                 case DEFAULT:
                     // Don't touch anything.
                     break;
diff --git a/stable-diffusion.h b/stable-diffusion.h
index 0de17ae2..4031a093 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -49,6 +49,7 @@ enum schedule_t {
     DEFAULT,
     DISCRETE,
     KARRAS,
+    AYS,
     N_SCHEDULES
 };
 

From 078ed5a607181ac8eee20f9df22f2c495ac1f6bb Mon Sep 17 00:00:00 2001
From: grauho <grauho@proton.me>
Date: Fri, 26 Apr 2024 18:12:24 -0400
Subject: [PATCH 2/4] Removed an old no longer relevant comment

---
 denoiser.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/denoiser.hpp b/denoiser.hpp
index 5e06a4dc..37890515 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -180,7 +180,6 @@ struct AYSSchedule : SigmaSchedule {
             0.173, 0.034, 0.002},
         };
     
-        /* Hard coded to SDXL while testing */
         std::vector<float> inputs;
         std::vector<float> results(len + 1);
     

From 36abf474eaa20e3b1b95381b8bedce8a0d2b55dd Mon Sep 17 00:00:00 2001
From: grauho <grauho@proton.me>
Date: Sat, 27 Apr 2024 10:21:53 -0400
Subject: [PATCH 3/4] Fixed an oversight when steps is equal to the length of
 the initial noise values

---
 denoiser.hpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/denoiser.hpp b/denoiser.hpp
index 37890515..49f37d50 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -207,7 +207,9 @@ struct AYSSchedule : SigmaSchedule {
          * size using log-linear interpolation */
         if ((len + 1) != inputs.size()) {
             results = logLinearInterpolation(inputs, len + 1);
-        }
+        } else {
+	    results = inputs; 
+	}
     
         /* Not sure if this is strictly neccessary */
         results[len] = 0.0f;

From 37036d9acc7fc1f324721694350a2d9ce9a1d033 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Mon, 29 Apr 2024 23:18:07 +0800
Subject: [PATCH 4/4] format code and avoid some warnings

---
 denoiser.hpp         | 84 +++++++++++++++++++++-----------------------
 model.cpp            |  1 +
 stable-diffusion.cpp | 10 +++---
 3 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/denoiser.hpp b/denoiser.hpp
index 49f37d50..255167c2 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -80,31 +80,30 @@ struct DiscreteSchedule : SigmaSchedule {
 https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
 */
 struct AYSSchedule : SigmaSchedule {
-    /* interp and linearInterp adapted from dpilger26's NumCpp library:
+    /* interp and linear_interp adapted from dpilger26's NumCpp library:
      * https://github.com/dpilger26/NumCpp/tree/5e40aab74d14e257d65d3dc385c9ff9e2120c60e */
     constexpr double interp(double left, double right, double perc) noexcept {
         return (left * (1. - perc)) + (right * perc);
     }
 
-    /* This will make the assumption that the reference x and y values are 
-     * already sorted in ascending order because they are being generated as 
+    /* This will make the assumption that the reference x and y values are
+     * already sorted in ascending order because they are being generated as
      * such in the calling function */
-    std::vector<double> linearInterp(std::vector<float> new_x, 
-        const std::vector<float> ref_x, const std::vector<float> ref_y)
-    {
+    std::vector<double> linear_interp(std::vector<float> new_x,
+                                      const std::vector<float> ref_x,
+                                      const std::vector<float> ref_y) {
         const size_t len_x = new_x.size();
-        size_t i = 0;
-        size_t j = 0;
+        size_t i           = 0;
+        size_t j           = 0;
         std::vector<double> new_y(len_x);
 
-        if (ref_x.size() != ref_y.size()) { 
+        if (ref_x.size() != ref_y.size()) {
             LOG_ERROR("Linear Interoplation Failed: length mismatch");
-            return new_y;    
+            return new_y;
         }
 
         /* serves as the bounds checking for the below while loop */
-        if ((new_x[0] < ref_x[0]) 
-        || (new_x[new_x.size() - 1] > ref_x[ref_x.size() - 1])) {
+        if ((new_x[0] < ref_x[0]) || (new_x[new_x.size() - 1] > ref_x[ref_x.size() - 1])) {
             LOG_ERROR("Linear Interpolation Failed: bad bounds");
             return new_y;
         }
@@ -115,8 +114,7 @@ struct AYSSchedule : SigmaSchedule {
                 continue;
             }
 
-            const double perc = static_cast<double>(new_x[i] - ref_x[j])
-                / static_cast<double>(ref_x[j + 1] - ref_x[j]);
+            const double perc = static_cast<double>(new_x[i] - ref_x[j]) / static_cast<double>(ref_x[j + 1] - ref_x[j]);
 
             new_y[i] = interp(ref_y[j], ref_y[j + 1], perc);
             i++;
@@ -125,8 +123,7 @@ struct AYSSchedule : SigmaSchedule {
         return new_y;
     }
 
-    std::vector<float> linearSpace(const float start, const float end, 
-        const size_t num_points) {
+    std::vector<float> linear_space(const float start, const float end, const size_t num_points) {
         std::vector<float> result(num_points);
         const float inc = (end - start) / (static_cast<float>(num_points - 1));
 
@@ -141,10 +138,10 @@ struct AYSSchedule : SigmaSchedule {
         return result;
     }
 
-    std::vector<float> logLinearInterpolation(std::vector<float> sigma_in, 
-        const size_t new_len) {
+    std::vector<float> log_linear_interpolation(std::vector<float> sigma_in,
+                                                const size_t new_len) {
         const size_t s_len        = sigma_in.size();
-        std::vector<float> x_vals = linearSpace(0.f, 1.f, s_len);
+        std::vector<float> x_vals = linear_space(0.f, 1.f, s_len);
         std::vector<float> y_vals(s_len);
 
         /* Reverses the input array to be ascending instead of descending,
@@ -153,8 +150,8 @@ struct AYSSchedule : SigmaSchedule {
             y_vals[i] = std::log(sigma_in[s_len - i - 1]);
         }
 
-        std::vector<float>  new_x_vals = linearSpace(0.f, 1.f, new_len);
-        std::vector<double> new_y_vals = linearInterp(new_x_vals, x_vals, y_vals);
+        std::vector<float> new_x_vals  = linear_space(0.f, 1.f, new_len);
+        std::vector<double> new_y_vals = linear_interp(new_x_vals, x_vals, y_vals);
         std::vector<float> results(new_len);
 
         for (size_t i = 0; i < new_len; i++) {
@@ -164,56 +161,55 @@ struct AYSSchedule : SigmaSchedule {
         return results;
     }
 
-
     std::vector<float> get_sigmas(uint32_t len) {
         const std::vector<float> noise_levels[] = {
             /* SD1.5 */
-            {14.6146412293, 6.4745760956,  3.8636745985,  2.6946151520, 
-            1.8841921177,   1.3943805092,  0.9642583904,  0.6523686016, 
-            0.3977456272,   0.1515232662,  0.0291671582},
+            {14.6146412293f, 6.4745760956f, 3.8636745985f, 2.6946151520f,
+             1.8841921177f, 1.3943805092f, 0.9642583904f, 0.6523686016f,
+             0.3977456272f, 0.1515232662f, 0.0291671582f},
             /* SDXL */
-            {14.6146412293, 6.3184485287,  3.7681790315,  2.1811480769, 
-            1.3405244945,   0.8620721141,  0.5550693289,  0.3798540708, 
-            0.2332364134,   0.1114188177,  0.0291671582},
+            {14.6146412293f, 6.3184485287f, 3.7681790315f, 2.1811480769f,
+             1.3405244945f, 0.8620721141f, 0.5550693289f, 0.3798540708f,
+             0.2332364134f, 0.1114188177f, 0.0291671582f},
             /* SVD */
-            {700.00, 54.5, 15.886, 7.977, 4.248, 1.789, 0.981, 0.403, 
-            0.173, 0.034, 0.002},
+            {700.00f, 54.5f, 15.886f, 7.977f, 4.248f, 1.789f, 0.981f, 0.403f,
+             0.173f, 0.034f, 0.002f},
         };
-    
+
         std::vector<float> inputs;
         std::vector<float> results(len + 1);
-    
+
         switch (version) {
             case VERSION_2_x: /* fallthrough */
-	        LOG_WARN("AYS not designed for SD2.X models");
-            case VERSION_1_x: 
-	    	LOG_INFO("AYS using SD1.5 noise levels");
+                LOG_WARN("AYS not designed for SD2.X models");
+            case VERSION_1_x:
+                LOG_INFO("AYS using SD1.5 noise levels");
                 inputs = noise_levels[0];
                 break;
             case VERSION_XL:
-	    	LOG_INFO("AYS using SDXL noise levels");
+                LOG_INFO("AYS using SDXL noise levels");
                 inputs = noise_levels[1];
                 break;
             case VERSION_SVD:
-	    	LOG_INFO("AYS using SVD noise levels");
+                LOG_INFO("AYS using SVD noise levels");
                 inputs = noise_levels[2];
                 break;
             default:
                 LOG_ERROR("Version not compatable with AYS scheduler");
                 return results;
         }
-            
+
         /* Stretches those pre-calculated reference levels out to the desired
          * size using log-linear interpolation */
         if ((len + 1) != inputs.size()) {
-            results = logLinearInterpolation(inputs, len + 1);
+            results = log_linear_interpolation(inputs, len + 1);
         } else {
-	    results = inputs; 
-	}
-    
+            results = inputs;
+        }
+
         /* Not sure if this is strictly neccessary */
         results[len] = 0.0f;
-    
+
         return results;
     }
 };
@@ -230,7 +226,7 @@ struct KarrasSchedule : SigmaSchedule {
 
         float min_inv_rho = pow(sigma_min, (1.f / rho));
         float max_inv_rho = pow(sigma_max, (1.f / rho));
-        for (uint32_t i = 0; i < n; i++) { 
+        for (uint32_t i = 0; i < n; i++) {
             // Eq. (5) from Karras et al 2022
             result[i] = pow(max_inv_rho + (float)i / ((float)n - 1.f) * (min_inv_rho - max_inv_rho), rho);
         }
diff --git a/model.cpp b/model.cpp
index 3db919be..684317d2 100644
--- a/model.cpp
+++ b/model.cpp
@@ -890,6 +890,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
 
         // ggml/src/ggml.c:2745
         if (n_dims < 1 || n_dims > GGML_MAX_DIMS) {
+            LOG_ERROR("skip tensor '%s' with n_dims %d", name.c_str(), n_dims);
             continue;
         }
 
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 09eb8f45..e4eb56e7 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -450,11 +450,11 @@ class StableDiffusionGGML {
                     LOG_INFO("running with Karras schedule");
                     denoiser->schedule = std::make_shared<KarrasSchedule>();
                     break;
-		case AYS:
-		    LOG_INFO("Running with Align-Your-Steps schedule");
-		    denoiser->schedule = std::make_shared<AYSSchedule>();
-		    denoiser->schedule->version = version;
-		    break;
+                case AYS:
+                    LOG_INFO("Running with Align-Your-Steps schedule");
+                    denoiser->schedule          = std::make_shared<AYSSchedule>();
+                    denoiser->schedule->version = version;
+                    break;
                 case DEFAULT:
                     // Don't touch anything.
                     break;