Skip to content

Commit 48bcce4

Browse files
grauholeejet
andauthored
fix: avoid double free and fix sdxl lora naming conversion
* Fixed a double free issue when running multiple backends on the CPU, eg: CLIP and the primary backend, as this would result in the *_backend pointers both pointing to the same thing resulting in a segfault when calling the StableDiffusionGGML destructor. * Improve logging to allow for a color switch on the command line interface. Changed the base log_printf function to not bake the log level directly into the log buffer as that information is already passed the logging function via the level parameter and it's easier to add in there than strip it out. * Added a fix for certain SDXL LoRAs that don't seem to follow the expected naming convention, converts over the tensor name during the LoRA model loading. Added some logging of useful LoRA loading information. Had to increase the base size of the GGML graph as the existing size results in an insufficient graph memory error when using SDXL LoRAs. * small fixes --------- Co-authored-by: leejet <[email protected]>
1 parent a469688 commit 48bcce4

7 files changed

+102
-32
lines changed

examples/cli/main.cpp

+38-6
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ struct SDParams {
103103
bool clip_on_cpu = false;
104104
bool vae_on_cpu = false;
105105
bool canny_preprocess = false;
106+
bool color = false;
106107
int upscale_repeats = 1;
107108
};
108109

@@ -469,6 +470,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
469470
exit(0);
470471
} else if (arg == "-v" || arg == "--verbose") {
471472
params.verbose = true;
473+
} else if (arg == "--color") {
474+
params.color = true;
472475
} else {
473476
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
474477
print_usage(argc, argv);
@@ -572,18 +575,47 @@ std::string get_image_params(SDParams params, int64_t seed) {
572575
return parameter_string;
573576
}
574577

578+
/* Enables Printing the log level tag in color using ANSI escape codes */
575579
void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
576580
SDParams* params = (SDParams*)data;
577-
if (!params->verbose && level <= SD_LOG_DEBUG) {
581+
int tag_color;
582+
const char* level_str;
583+
FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout;
584+
585+
if (!log || (!params->verbose && level <= SD_LOG_DEBUG)) {
578586
return;
579587
}
580-
if (level <= SD_LOG_INFO) {
581-
fputs(log, stdout);
582-
fflush(stdout);
588+
589+
switch (level) {
590+
case SD_LOG_DEBUG:
591+
tag_color = 37;
592+
level_str = "DEBUG";
593+
break;
594+
case SD_LOG_INFO:
595+
tag_color = 34;
596+
level_str = "INFO";
597+
break;
598+
case SD_LOG_WARN:
599+
tag_color = 35;
600+
level_str = "WARN";
601+
break;
602+
case SD_LOG_ERROR:
603+
tag_color = 31;
604+
level_str = "ERROR";
605+
break;
606+
default: /* Potential future-proofing */
607+
tag_color = 33;
608+
level_str = "?????";
609+
break;
610+
}
611+
612+
if (params->color == true) {
613+
fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str);
583614
} else {
584-
fputs(log, stderr);
585-
fflush(stderr);
615+
fprintf(out_stream, "[%-5s] ", level_str);
586616
}
617+
fputs(log, out_stream);
618+
fflush(out_stream);
587619
}
588620

589621
int main(int argc, const char* argv[]) {

ggml_extend.hpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -759,8 +759,13 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_timestep_embedding(
759759
// virtual struct ggml_cgraph* get_ggml_cgraph() = 0;
760760
// };
761761

762+
/*
762763
#define MAX_PARAMS_TENSOR_NUM 10240
763764
#define MAX_GRAPH_SIZE 10240
765+
*/
766+
/* SDXL with LoRA requires more space */
767+
#define MAX_PARAMS_TENSOR_NUM 15360
768+
#define MAX_GRAPH_SIZE 15360
764769

765770
struct GGMLModule {
766771
protected:
@@ -1308,4 +1313,4 @@ class MultiheadAttention : public GGMLBlock {
13081313
}
13091314
};
13101315

1311-
#endif // __GGML_EXTEND__HPP__
1316+
#endif // __GGML_EXTEND__HPP__

lora.hpp

+20-4
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ struct LoraModel : public GGMLModule {
7575
return true;
7676
}
7777

78-
struct ggml_cgraph* build_graph(std::map<std::string, struct ggml_tensor*> model_tensors) {
78+
struct ggml_cgraph* build_lora_graph(std::map<std::string, struct ggml_tensor*> model_tensors) {
7979
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, LORA_GRAPH_SIZE, false);
8080

8181
std::set<std::string> applied_lora_tensors;
@@ -90,7 +90,7 @@ struct LoraModel : public GGMLModule {
9090
k_tensor = k_tensor.substr(0, k_pos);
9191
replace_all_chars(k_tensor, '.', '_');
9292
// LOG_DEBUG("k_tensor %s", k_tensor.c_str());
93-
if (k_tensor == "model_diffusion_model_output_blocks_2_2_conv") { // fix for SDXL
93+
if (k_tensor == "model_diffusion_model_output_blocks_2_2_conv") { // fix for SDXL
9494
k_tensor = "model_diffusion_model_output_blocks_2_1_conv";
9595
}
9696
std::string lora_up_name = "lora." + k_tensor + ".lora_up.weight";
@@ -155,21 +155,37 @@ struct LoraModel : public GGMLModule {
155155
ggml_build_forward_expand(gf, final_weight);
156156
}
157157

158+
size_t total_lora_tensors_count = 0;
159+
size_t applied_lora_tensors_count = 0;
160+
158161
for (auto& kv : lora_tensors) {
162+
total_lora_tensors_count++;
159163
if (applied_lora_tensors.find(kv.first) == applied_lora_tensors.end()) {
160164
LOG_WARN("unused lora tensor %s", kv.first.c_str());
165+
} else {
166+
applied_lora_tensors_count++;
161167
}
162168
}
169+
/* Don't worry if this message shows up twice in the logs per LoRA,
170+
* this function is called once to calculate the required buffer size
171+
* and then again to actually generate a graph to be used */
172+
if (applied_lora_tensors_count != total_lora_tensors_count) {
173+
LOG_WARN("Only (%lu / %lu) LoRA tensors have been applied",
174+
applied_lora_tensors_count, total_lora_tensors_count);
175+
} else {
176+
LOG_DEBUG("(%lu / %lu) LoRA tensors applied successfully",
177+
applied_lora_tensors_count, total_lora_tensors_count);
178+
}
163179

164180
return gf;
165181
}
166182

167183
void apply(std::map<std::string, struct ggml_tensor*> model_tensors, int n_threads) {
168184
auto get_graph = [&]() -> struct ggml_cgraph* {
169-
return build_graph(model_tensors);
185+
return build_lora_graph(model_tensors);
170186
};
171187
GGMLModule::compute(get_graph, n_threads, true);
172188
}
173189
};
174190

175-
#endif // __LORA_HPP__
191+
#endif // __LORA_HPP__

model.cpp

+22-1
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,23 @@ std::string convert_vae_decoder_name(const std::string& name) {
204204
return name;
205205
}
206206

207+
/* If not a SDXL LoRA the unet" prefix will have already been replaced by this
208+
* point and "te2" and "te1" don't seem to appear in non-SDXL only "te_" */
209+
std::string convert_sdxl_lora_name(std::string tensor_name) {
210+
const std::pair<std::string, std::string> sdxl_lora_name_lookup[] = {
211+
{"unet", "model_diffusion_model"},
212+
{"te2", "cond_stage_model_1_transformer"},
213+
{"te1", "cond_stage_model_transformer"},
214+
};
215+
for (auto& pair_i : sdxl_lora_name_lookup) {
216+
if (tensor_name.compare(0, pair_i.first.length(), pair_i.first) == 0) {
217+
tensor_name = std::regex_replace(tensor_name, std::regex(pair_i.first), pair_i.second);
218+
break;
219+
}
220+
}
221+
return tensor_name;
222+
}
223+
207224
std::unordered_map<std::string, std::unordered_map<std::string, std::string>> suffix_conversion_underline = {
208225
{
209226
"attentions",
@@ -415,8 +432,12 @@ std::string convert_tensor_name(const std::string& name) {
415432
if (pos != std::string::npos) {
416433
std::string name_without_network_parts = name.substr(5, pos - 5);
417434
std::string network_part = name.substr(pos + 1);
435+
418436
// LOG_DEBUG("%s %s", name_without_network_parts.c_str(), network_part.c_str());
419437
std::string new_key = convert_diffusers_name_to_compvis(name_without_network_parts, '_');
438+
/* For dealing with the new SDXL LoRA tensor naming convention */
439+
new_key = convert_sdxl_lora_name(new_key);
440+
420441
if (new_key.empty()) {
421442
new_name = name;
422443
} else {
@@ -1641,4 +1662,4 @@ bool convert(const char* input_path, const char* vae_path, const char* output_pa
16411662
}
16421663
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
16431664
return success;
1644-
}
1665+
}

stable-diffusion.cpp

+13-7
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,16 @@ class StableDiffusionGGML {
122122
}
123123

124124
~StableDiffusionGGML() {
125+
if (clip_backend != backend) {
126+
ggml_backend_free(clip_backend);
127+
}
128+
if (control_net_backend != backend) {
129+
ggml_backend_free(control_net_backend);
130+
}
131+
if (vae_backend != backend) {
132+
ggml_backend_free(vae_backend);
133+
}
125134
ggml_backend_free(backend);
126-
ggml_backend_free(clip_backend);
127-
ggml_backend_free(control_net_backend);
128-
ggml_backend_free(vae_backend);
129135
}
130136

131137
bool load_from_file(const std::string& model_path,
@@ -521,9 +527,7 @@ class StableDiffusionGGML {
521527

522528
int64_t t1 = ggml_time_ms();
523529

524-
LOG_INFO("lora '%s' applied, taking %.2fs",
525-
lora_name.c_str(),
526-
(t1 - t0) * 1.0f / 1000);
530+
LOG_INFO("lora '%s' applied, taking %.2fs", lora_name.c_str(), (t1 - t0) * 1.0f / 1000);
527531
}
528532

529533
void apply_loras(const std::unordered_map<std::string, float>& lora_state) {
@@ -546,6 +550,8 @@ class StableDiffusionGGML {
546550
}
547551
}
548552

553+
LOG_INFO("Attempting to apply %lu LoRAs", lora_state.size());
554+
549555
for (auto& kv : lora_state_diff) {
550556
apply_lora(kv.first, kv.second);
551557
}
@@ -2109,4 +2115,4 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
21092115
LOG_INFO("img2vid completed in %.2fs", (t3 - t0) * 1.0f / 1000);
21102116

21112117
return result_images;
2112-
}
2118+
}

stable-diffusion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -201,4 +201,4 @@ SD_API uint8_t* preprocess_canny(uint8_t* img,
201201
}
202202
#endif
203203

204-
#endif // __STABLE_DIFFUSION_H__
204+
#endif // __STABLE_DIFFUSION_H__

util.cpp

+2-12
Original file line numberDiff line numberDiff line change
@@ -366,18 +366,8 @@ void log_printf(sd_log_level_t level, const char* file, int line, const char* fo
366366
va_list args;
367367
va_start(args, format);
368368

369-
const char* level_str = "DEBUG";
370-
if (level == SD_LOG_INFO) {
371-
level_str = "INFO ";
372-
} else if (level == SD_LOG_WARN) {
373-
level_str = "WARN ";
374-
} else if (level == SD_LOG_ERROR) {
375-
level_str = "ERROR";
376-
}
377-
378369
static char log_buffer[LOG_BUFFER_SIZE + 1];
379-
380-
int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "[%s] %s:%-4d - ", level_str, sd_basename(file).c_str(), line);
370+
int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);
381371

382372
if (written >= 0 && written < LOG_BUFFER_SIZE) {
383373
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
@@ -572,4 +562,4 @@ sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) {
572562
}
573563

574564
return result;
575-
}
565+
}

0 commit comments

Comments
 (0)